summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ahub/tcchecker-tca/config.yaml9
-rw-r--r--.gitattributes18
-rw-r--r--compiler/angkor/CMakeLists.txt4
-rw-r--r--compiler/arser/tests/arser.test.cpp65
-rw-r--r--compiler/circle-eval-diff/CMakeLists.txt34
-rw-r--r--compiler/circle-eval-diff/README.md51
-rw-r--r--compiler/circle-eval-diff/driver/Driver.cpp156
-rw-r--r--compiler/circle-eval-diff/include/CircleEvalDiff.h74
-rw-r--r--compiler/circle-eval-diff/requires.cmake7
-rw-r--r--compiler/circle-eval-diff/src/CircleEvalDiff.cpp97
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.cpp185
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.h90
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.test.cpp236
-rw-r--r--compiler/circle-eval-diff/src/ModuleEvalDiff.cpp216
-rw-r--r--compiler/circle-eval-diff/src/ModuleEvalDiff.h67
-rw-r--r--compiler/circle-eval-diff/src/Tensor.cpp72
-rw-r--r--compiler/circle-eval-diff/src/Tensor.h81
-rw-r--r--compiler/circle-eval-diff/src/Tensor.test.cpp101
-rw-r--r--compiler/circle-execution-plan/CMakeLists.txt6
-rw-r--r--compiler/circle-execution-plan/README.md5
-rw-r--r--compiler/circle-execution-plan/pal/IScratchpadHelper.h51
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h187
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h137
-rw-r--r--compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h88
-rw-r--r--compiler/circle-execution-plan/pal/TargetPlatform.h (renamed from runtime/onert/backend/gpu_cl/open_cl/Status.h)33
-rw-r--r--compiler/circle-execution-plan/src/CircleExecutionPlan.cpp47
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.cpp174
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.h67
-rw-r--r--compiler/circle-inspect/CMakeLists.txt7
-rw-r--r--compiler/circle-inspect/README.md16
-rw-r--r--compiler/circle-inspect/driver/Driver.cpp6
-rw-r--r--compiler/circle-inspect/requires.cmake2
-rw-r--r--compiler/circle-inspect/src/Dump.cpp25
-rw-r--r--compiler/circle-inspect/src/Dump.h9
-rw-r--r--compiler/circle-inspect/src/Reader.cpp72
-rw-r--r--compiler/circle-inspect/src/Reader.h8
-rw-r--r--compiler/circle-opselector/README.md42
-rw-r--r--compiler/circle-part-value-test/CMakeLists.txt6
-rwxr-xr-xcompiler/circle-part-value-test/part_eval_one.py86
-rw-r--r--compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part7
-rw-r--r--compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part7
-rw-r--r--compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Split_Add_000.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part7
-rw-r--r--compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part7
-rw-r--r--compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part7
-rw-r--r--compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part7
-rw-r--r--compiler/circle-part-value-test/test.lst21
-rw-r--r--compiler/circle-partitioner-test/CMakeLists.txt4
-rw-r--r--compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part7
-rw-r--r--compiler/circle-partitioner-test/test.lst4
-rw-r--r--compiler/circle-partitioner/CMakeLists.txt19
-rw-r--r--compiler/circle-partitioner/README.md130
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt144
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/README.md37
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/requires.cmake6
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/test.lst15
-rwxr-xr-xcompiler/circle-quantizer-dredd-recipe-test/testall.sh100
-rw-r--r--compiler/circle-quantizer/CMakeLists.txt10
-rw-r--r--compiler/circle-quantizer/src/CircleQuantizer.cpp146
-rw-r--r--compiler/circle-tensordump/CMakeLists.txt7
-rw-r--r--compiler/circle-tensordump/requires.cmake2
-rw-r--r--compiler/circle-tensordump/src/Reader.cpp62
-rw-r--r--compiler/circle-tensordump/src/Reader.h6
-rw-r--r--compiler/circle-verify/CMakeLists.txt7
-rw-r--r--compiler/circle-verify/requires.cmake2
-rw-r--r--compiler/circle2circle-dredd-recipe-test/CMakeLists.txt4
-rw-r--r--compiler/circle2circle/CMakeLists.txt2
-rw-r--r--compiler/circle2circle/requires.cmake1
-rw-r--r--compiler/circle2circle/src/Circle2Circle.cpp16
-rw-r--r--compiler/circlechef/CMakeLists.txt6
-rw-r--r--compiler/circlechef/circle/CMakeLists.txt3
-rw-r--r--compiler/circlechef/circle/src/CircleImport.cpp35
-rw-r--r--compiler/circlechef/circle/src/CircleImport.h5
-rw-r--r--compiler/circlechef/circle/src/RecipeChef.cpp11
-rw-r--r--compiler/circlechef/core/CMakeLists.txt2
-rw-r--r--compiler/circlechef/core/src/ModelChef.cpp4
-rw-r--r--compiler/circlechef/requires.cmake3
-rw-r--r--compiler/circlechef/tests/CMakeLists.txt33
-rw-r--r--compiler/circledump/CMakeLists.txt9
-rw-r--r--compiler/circledump/README.md2
-rw-r--r--compiler/circledump/requires.cmake2
-rw-r--r--compiler/circledump/src/Dump.cpp58
-rw-r--r--compiler/circledump/src/Load.cpp2
-rw-r--r--compiler/circledump/src/OpPrinter.cpp36
-rw-r--r--compiler/circledump/src/Read.cpp61
-rw-r--r--compiler/circledump/src/Read.h9
-rw-r--r--compiler/cli/CMakeLists.txt8
-rw-r--r--compiler/common-artifacts/CMakeLists.txt117
-rw-r--r--compiler/common-artifacts/exclude.lst17
-rw-r--r--compiler/common-artifacts/options.lst6
-rw-r--r--compiler/common-artifacts/requires.cmake2
-rw-r--r--compiler/common-artifacts/src/TestDataGenerator.cpp90
-rw-r--r--compiler/dio-hdf5/CMakeLists.txt30
-rw-r--r--compiler/dio-hdf5/README.md29
-rw-r--r--compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h82
-rw-r--r--compiler/dio-hdf5/requires.cmake1
-rw-r--r--compiler/dio-hdf5/src/HDF5Importer.cpp (renamed from compiler/record-minmax/src/HDF5Importer.cpp)34
-rw-r--r--compiler/dio-hdf5/src/HDF5Importer.test.cpp134
-rwxr-xr-xcompiler/dredd-rule-lib/rule-lib.sh17
-rw-r--r--compiler/embedded-import-value-test/.gitignore1
-rw-r--r--compiler/embedded-import-value-test/CMakeLists.txt34
-rw-r--r--compiler/embedded-import-value-test/README.md13
-rwxr-xr-xcompiler/embedded-import-value-test/evalverify.sh58
-rw-r--r--compiler/embedded-import-value-test/requires.cmake6
-rw-r--r--compiler/embedded-import-value-test/src/TestDriver.cpp242
-rw-r--r--compiler/embedded-import-value-test/test.lst192
-rw-r--r--compiler/enco/CMakeLists.txt5
-rw-r--r--compiler/enco/core/CMakeLists.txt8
-rw-r--r--compiler/enco/frontend/caffe/CMakeLists.txt8
-rw-r--r--compiler/enco/frontend/tflite/CMakeLists.txt11
-rw-r--r--compiler/exo/CMakeLists.txt4
-rw-r--r--compiler/hermes-std/CMakeLists.txt4
-rw-r--r--compiler/hermes-std/include/hermes/ConsoleReporter.h4
-rw-r--r--compiler/hermes-std/src/ConsoleReporter.cpp52
-rw-r--r--compiler/hermes-std/src/ConsoleReporter.test.cpp165
-rw-r--r--compiler/hermes/CMakeLists.txt4
-rw-r--r--compiler/hermes/include/hermes/core/Message.h10
-rw-r--r--compiler/hermes/include/hermes/core/MessageBuffer.h3
-rw-r--r--compiler/hermes/src/core/MessageBuffer.cpp8
-rw-r--r--compiler/hermes/src/core/Source.cpp5
-rw-r--r--compiler/locomotiv/CMakeLists.txt4
-rw-r--r--compiler/locop/CMakeLists.txt4
-rw-r--r--compiler/logo-core/CMakeLists.txt4
-rw-r--r--compiler/logo-ex/CMakeLists.txt23
-rw-r--r--compiler/logo-ex/README.md6
-rw-r--r--compiler/logo-ex/include/logo/ConstantFoldingPass.h (renamed from compiler/logo/include/logo/ConstantFoldingPass.h)8
-rw-r--r--compiler/logo-ex/include/logo/PassesEx.h24
-rw-r--r--compiler/logo-ex/requires.cmake3
-rw-r--r--compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp (renamed from compiler/logo/src/Passes/ConstantFoldingPass.cpp)2
-rw-r--r--compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp (renamed from compiler/logo/src/Passes/ConstantFoldingPass.test.cpp)2
-rw-r--r--compiler/logo-ex/src/TestHelper.h44
-rw-r--r--compiler/logo/CMakeLists.txt5
-rw-r--r--compiler/logo/include/logo/Passes.h1
-rw-r--r--compiler/logo/requires.cmake1
-rw-r--r--compiler/luci-interpreter/README.md2
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h35
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/Interpreter.h5
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst4
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h124
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h163
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h192
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h44
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h114
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALMul.h18
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h190
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/pal.cmake9
-rw-r--r--compiler/luci-interpreter/pal/linux/KernelsToBuild.lst7
-rw-r--r--compiler/luci-interpreter/pal/linux/PALAveragePool2d.h73
-rw-r--r--compiler/luci-interpreter/pal/linux/PALBatchMatMul.h67
-rw-r--r--compiler/luci-interpreter/pal/linux/PALConv2d.h72
-rw-r--r--compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-interpreter/pal/linux/PALDequantize.h34
-rw-r--r--compiler/luci-interpreter/pal/linux/PALFullyConnected.h61
-rw-r--r--compiler/luci-interpreter/pal/linux/PALGather.h35
-rw-r--r--compiler/luci-interpreter/pal/linux/PALMul.h28
-rw-r--r--compiler/luci-interpreter/pal/linux/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSVDF.h90
-rw-r--r--compiler/luci-interpreter/pal/linux/pal.cmake30
-rw-r--r--compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst4
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h73
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALConv2d.h43
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDequantize.h44
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALFullyConnected.h61
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALMul.h18
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSVDF.h258
-rw-r--r--compiler/luci-interpreter/pal/mcu/pal.cmake4
-rw-r--r--compiler/luci-interpreter/src/CMakeLists.txt3
-rw-r--r--compiler/luci-interpreter/src/Interpreter.cpp27
-rw-r--r--compiler/luci-interpreter/src/core/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/core/KernelParams.h25
-rw-r--r--compiler/luci-interpreter/src/import/CMakeLists.txt15
-rw-r--r--compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp33
-rw-r--r--compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp113
-rw-r--r--compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h39
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.cpp38
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.test.cpp93
-rw-r--r--compiler/luci-interpreter/src/kernels/ArgMax.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.cpp21
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.h3
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.cpp188
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.h49
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp272
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/kernels/Cast.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.test.cpp55
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.cpp73
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.h3
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h2
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp50
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.cpp79
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.h43
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.test.cpp149
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.cpp36
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.test.cpp69
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.cpp88
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.h44
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp115
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.cpp139
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.h47
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.test.cpp137
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp105
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp3
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/Logistic.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.cpp118
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp210
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.cpp37
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.test.cpp126
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.cpp136
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.h48
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.test.cpp192
-rw-r--r--compiler/luci-interpreter/src/kernels/Pack.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.test.cpp26
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.cpp160
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.h43
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.test.cpp254
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.cpp241
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.h56
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.test.cpp341
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.cpp5
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.test.cpp4
-rw-r--r--compiler/luci-interpreter/src/kernels/Softmax.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Split.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Squeeze.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.cpp36
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.test.cpp75
-rw-r--r--compiler/luci-interpreter/src/kernels/Transpose.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Unpack.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.cpp22
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.h33
-rw-r--r--compiler/luci-interpreter/src/loader/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.cpp94
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp48
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp22
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp72
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp17
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp22
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp1
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Gather.cpp44
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/OneHot.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Quantize.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SVDF.cpp93
-rw-r--r--compiler/luci-micro/CMakeLists.txt2
-rw-r--r--compiler/luci-pass-value-test/CMakeLists.txt6
-rw-r--r--compiler/luci-pass-value-test/eval_result_verifier.py56
-rw-r--r--compiler/luci-pass-value-test/test.lst4
-rw-r--r--compiler/luci-value-test/CMakeLists.txt74
-rwxr-xr-xcompiler/luci-value-test/evalverify.sh4
-rwxr-xr-xcompiler/luci-value-test/evalverify_ref.sh63
-rwxr-xr-xcompiler/luci-value-test/evalverifytol.sh71
-rwxr-xr-xcompiler/luci-value-test/evalverifytol_ref.sh70
-rwxr-xr-xcompiler/luci-value-test/luci_eval_verifier.py77
-rwxr-xr-xcompiler/luci-value-test/luci_eval_verifier_ref.py151
-rw-r--r--compiler/luci-value-test/test.lst106
-rw-r--r--compiler/luci/CMakeLists.txt4
-rw-r--r--compiler/luci/export/CMakeLists.txt4
-rw-r--r--compiler/luci/export/src/CircleBuiltinTypesExtractor.h539
-rw-r--r--compiler/luci/export/src/CircleBuiltinTypesMappingRule.h79
-rw-r--r--compiler/luci/export/src/CircleExporterImpl.cpp9
-rw-r--r--compiler/luci/export/src/CircleExporterUtils.cpp58
-rw-r--r--compiler/luci/export/src/CircleExporterUtils.h6
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.cpp1696
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.h2
-rw-r--r--compiler/luci/export/src/CircleOperationExporterRule.cpp277
-rw-r--r--compiler/luci/export/src/CircleOperationExporterRule.h76
-rw-r--r--compiler/luci/export/src/CircleOps.lst154
-rw-r--r--compiler/luci/export/src/CircleTensorExporter.cpp15
-rw-r--r--compiler/luci/export/src/SerializedData.h6
-rw-r--r--compiler/luci/import/CMakeLists.txt7
-rw-r--r--compiler/luci/import/include/luci/Import/CircleReader.h73
-rw-r--r--compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h23
-rw-r--r--compiler/luci/import/include/luci/Import/NodeBuilder.h58
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes.h2
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleConst.h17
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h37
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h37
-rw-r--r--compiler/luci/import/src/CircleImportMetadata.cpp43
-rw-r--r--compiler/luci/import/src/CircleReader.cpp186
-rw-r--r--compiler/luci/import/src/GraphBuilder.cpp15
-rw-r--r--compiler/luci/import/src/GraphBuilderMultiOutput.cpp20
-rw-r--r--compiler/luci/import/src/GraphBuilderRegistry.cpp9
-rw-r--r--compiler/luci/import/src/Importer.cpp78
-rw-r--r--compiler/luci/import/src/Importer.test.cpp50
-rw-r--r--compiler/luci/import/src/Nodes/CircleCast.cpp12
-rw-r--r--compiler/luci/import/src/Nodes/CircleConst.cpp34
-rw-r--r--compiler/luci/import/src/Nodes/CircleCustom.cpp8
-rw-r--r--compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp5
-rw-r--r--compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp12
-rw-r--r--compiler/luci/import/src/Nodes/CircleElu.cpp10
-rw-r--r--compiler/luci/import/src/Nodes/CircleEqual.cpp5
-rw-r--r--compiler/luci/import/src/Nodes/CircleExp.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleExpandDims.cpp5
-rw-r--r--compiler/luci/import/src/Nodes/CircleFloorDiv.cpp17
-rw-r--r--compiler/luci/import/src/Nodes/CircleFloorMod.cpp9
-rw-r--r--compiler/luci/import/src/Nodes/CircleFullyConnected.cpp1
-rw-r--r--compiler/luci/import/src/Nodes/CircleGatherNd.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleGreater.cpp10
-rw-r--r--compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp8
-rw-r--r--compiler/luci/import/src/Nodes/CircleIf.cpp9
-rw-r--r--compiler/luci/import/src/Nodes/CircleLess.cpp13
-rw-r--r--compiler/luci/import/src/Nodes/CircleLessEqual.cpp8
-rw-r--r--compiler/luci/import/src/Nodes/CircleLog.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogicalNot.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogicalOr.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleLogistic.cpp5
-rw-r--r--compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp22
-rw-r--r--compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp25
-rw-r--r--compiler/luci/import/src/Nodes/CircleNotEqual.cpp8
-rw-r--r--compiler/luci/import/src/Nodes/CircleOneHot.cpp24
-rw-r--r--compiler/luci/import/src/Nodes/CircleReduceAny.cpp17
-rw-r--r--compiler/luci/import/src/Nodes/CircleReduceProd.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleReshape.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleReverseSequence.cpp15
-rw-r--r--compiler/luci/import/src/Nodes/CircleReverseV2.cpp15
-rw-r--r--compiler/luci/import/src/Nodes/CircleRound.cpp12
-rw-r--r--compiler/luci/import/src/Nodes/CircleRsqrt.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleSVDF.cpp67
-rw-r--r--compiler/luci/import/src/Nodes/CircleScatterNd.cpp9
-rw-r--r--compiler/luci/import/src/Nodes/CircleSegmentSum.cpp15
-rw-r--r--compiler/luci/import/src/Nodes/CircleSelect.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleSelectV2.cpp14
-rw-r--r--compiler/luci/import/src/Nodes/CircleSin.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleSquare.cpp12
-rw-r--r--compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp13
-rw-r--r--compiler/luci/import/src/Nodes/CircleTanh.cpp5
-rw-r--r--compiler/luci/import/src/Nodes/CircleTile.cpp10
-rw-r--r--compiler/luci/import/src/Nodes/CircleTopKV2.cpp7
-rw-r--r--compiler/luci/import/src/Nodes/CircleTransposeConv.cpp14
-rw-r--r--compiler/luci/import/src/Nodes/CircleUnpack.cpp11
-rw-r--r--compiler/luci/import/src/Nodes/CircleVariable.cpp80
-rw-r--r--compiler/luci/import/src/Nodes/CircleWhere.cpp12
-rw-r--r--compiler/luci/import/src/Nodes/CircleWhile.cpp13
-rw-r--r--compiler/luci/import/src/ValidateHelpers.cpp39
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.h9
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.lst5
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleQuantParam.h4
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h4
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h70
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h39
-rw-r--r--compiler/luci/lang/src/CircleQuantParam.cpp (renamed from runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc)44
-rw-r--r--compiler/luci/lang/src/CircleQuantParam.test.cpp78
-rw-r--r--compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp1
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp101
-rw-r--r--compiler/luci/lang/src/Nodes/CircleVariable.test.cpp61
-rw-r--r--compiler/luci/logex/CMakeLists.txt14
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp265
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilder.h52
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp309
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp1128
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilders.h821
-rw-r--r--compiler/luci/logex/src/FormattedGraph.cpp2194
-rw-r--r--compiler/luci/partition/CMakeLists.txt2
-rw-r--r--compiler/luci/partition/src/ConnectNode.h2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSVDF.cpp47
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp106
-rw-r--r--compiler/luci/partition/src/Nodes/CircleVariable.cpp27
-rw-r--r--compiler/luci/partition/src/PartitionIRDump.cpp11
-rw-r--r--compiler/luci/partition/src/PartitionMerge.cpp50
-rw-r--r--compiler/luci/partition/src/PartitionPGroups.cpp240
-rw-r--r--compiler/luci/pass/CMakeLists.txt8
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h20
-rw-r--r--compiler/luci/pass/include/luci/CircleQuantizer.h97
-rw-r--r--compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h39
-rw-r--r--compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h53
-rw-r--r--compiler/luci/pass/include/luci/Pass/FoldGatherPass.h38
-rw-r--r--compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h42
-rw-r--r--compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h (renamed from compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h)19
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizationParameters.h11
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h28
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h39
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h39
-rw-r--r--compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h37
-rw-r--r--compiler/luci/pass/src/BatchNormPatternFinder.cpp40
-rw-r--r--compiler/luci/pass/src/BatchNormPatternFinder.test.cpp107
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp224
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.test.cpp168
-rw-r--r--compiler/luci/pass/src/CircleQuantizer.cpp458
-rw-r--r--compiler/luci/pass/src/CircleQuantizer.test.cpp191
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp6
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp36
-rw-r--r--compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp214
-rw-r--r--compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp277
-rw-r--r--compiler/luci/pass/src/CopyQuantParamPass.cpp82
-rw-r--r--compiler/luci/pass/src/FoldGatherPass.cpp185
-rw-r--r--compiler/luci/pass/src/FoldGatherPass.test.cpp214
-rw-r--r--compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp36
-rw-r--r--compiler/luci/pass/src/PropagateQParamBackwardPass.cpp482
-rw-r--r--compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp167
-rw-r--r--compiler/luci/pass/src/PropagateQParamForwardPass.cpp194
-rw-r--r--compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp260
-rw-r--r--compiler/luci/pass/src/PropagateQuantParamPass.cpp107
-rw-r--r--compiler/luci/pass/src/PropagateQuantParamPass.test.cpp125
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.cpp158
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.h36
-rw-r--r--compiler/luci/pass/src/QuantizeActivation.cpp296
-rw-r--r--compiler/luci/pass/src/QuantizeActivation.h165
-rw-r--r--compiler/luci/pass/src/QuantizeBias.cpp300
-rw-r--r--compiler/luci/pass/src/QuantizeBias.h56
-rw-r--r--compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp259
-rw-r--r--compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp14
-rw-r--r--compiler/luci/pass/src/QuantizePreCheckerPass.cpp119
-rw-r--r--compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp401
-rw-r--r--compiler/luci/pass/src/QuantizeWeights.cpp394
-rw-r--r--compiler/luci/pass/src/QuantizeWeights.h55
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp1773
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp49
-rw-r--r--compiler/luci/pass/src/QuantizedModelVerifier.cpp70
-rw-r--r--compiler/luci/pass/src/QuantizedModelVerifier.h30
-rw-r--r--compiler/luci/pass/src/QuantizedModelVerifier.test.cpp497
-rw-r--r--compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp104
-rw-r--r--compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp166
-rw-r--r--compiler/luci/pass/src/RemoveRedundantTransposePass.cpp2
-rw-r--r--compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp25
-rw-r--r--compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp19
-rw-r--r--compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp26
-rw-r--r--compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp46
-rw-r--r--compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp13
-rw-r--r--compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp14
-rw-r--r--compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp2
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp105
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedBiasScale.h59
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp38
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h (renamed from compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h)301
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h473
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h516
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeType.cpp554
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeType.h157
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h518
-rw-r--r--compiler/luci/pass/src/helpers/LayerInfoMap.cpp189
-rw-r--r--compiler/luci/pass/src/helpers/LayerInfoMap.h33
-rw-r--r--compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp201
-rw-r--r--compiler/luci/requires.cmake4
-rw-r--r--compiler/luci/service/CMakeLists.txt1
-rw-r--r--compiler/luci/service/include/luci/Service/CircleShapeInference.h7
-rw-r--r--compiler/luci/service/include/luci/Service/CircleTypeInference.h8
-rw-r--r--compiler/luci/service/src/CircleCloneNode.h2
-rw-r--r--compiler/luci/service/src/CircleNodeClone.cpp14
-rw-r--r--compiler/luci/service/src/CircleShapeInferenceRule.cpp90
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.cpp7
-rw-r--r--compiler/luci/service/src/Nodes/CircleSVDF.cpp37
-rw-r--r--compiler/luci/service/src/Nodes/CircleSVDF.test.cpp47
-rw-r--r--compiler/luci/service/src/Nodes/CircleVariable.cpp27
-rw-r--r--compiler/luci/service/src/Nodes/CircleVariable.test.cpp33
-rw-r--r--compiler/luci/tests/CMakeLists.txt33
-rw-r--r--compiler/luci/tests/test.lst4
-rw-r--r--compiler/mio-circle/CMakeLists.txt12
-rw-r--r--compiler/mio-circle/include/mio_circle/Helper.h (renamed from runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc)38
-rw-r--r--compiler/mio-circle/src/Helper.cpp81
-rw-r--r--compiler/mio-circle04/CMakeLists.txt52
-rw-r--r--compiler/mio-circle04/README.md3
-rw-r--r--compiler/mio-circle04/example.cpp41
-rw-r--r--compiler/mio-circle04/include/mio_circle/Helper.h37
-rw-r--r--compiler/mio-circle04/src/Helper.cpp110
-rw-r--r--compiler/mio-circle04/src/Helper.test.cpp153
-rw-r--r--compiler/mio-tflite/CMakeLists.txt2
-rw-r--r--compiler/mio-tflite260/CMakeLists.txt24
-rw-r--r--compiler/mio-tflite260/include/mio_tflite260/Helper.h (renamed from runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h)32
-rw-r--r--compiler/mio-tflite260/src/Helper.cpp104
-rw-r--r--compiler/mio-tflite260/src/Helper.test.cpp159
-rw-r--r--compiler/mio-tflite280/CMakeLists.txt69
-rw-r--r--compiler/mio-tflite280/README.md3
-rw-r--r--compiler/mio-tflite280/example.cpp41
-rw-r--r--compiler/mio-tflite280/include/mio_tflite280/Helper.h37
-rw-r--r--compiler/mio-tflite280/src/Helper.cpp104
-rw-r--r--compiler/mio-tflite280/src/Helper.test.cpp159
-rw-r--r--compiler/mir/src/mir_onnx_importer/CMakeLists.txt4
-rw-r--r--compiler/mir/src/mir_tflite_importer/CMakeLists.txt2
-rw-r--r--compiler/mir2loco/CMakeLists.txt8
-rw-r--r--compiler/moco-tf/CMakeLists.txt2
-rw-r--r--compiler/moco-tf/requires.cmake1
-rw-r--r--compiler/moco-tf/src/Transforms.h1
-rw-r--r--compiler/morph/CMakeLists.txt8
-rw-r--r--compiler/nest/core/CMakeLists.txt8
-rw-r--r--compiler/nike/CMakeLists.txt8
-rw-r--r--compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp2
-rw-r--r--compiler/nnop/CMakeLists.txt8
-rw-r--r--compiler/one-cmds/CMakeLists.txt39
-rw-r--r--compiler/one-cmds/how-to-prepare-virtualenv.txt19
-rw-r--r--compiler/one-cmds/how-to-use-one-commands.txt2
-rw-r--r--compiler/one-cmds/one-build18
-rw-r--r--compiler/one-cmds/one-import-bcq9
-rw-r--r--compiler/one-cmds/one-import-onnx26
-rw-r--r--compiler/one-cmds/one-import-pytorch366
-rw-r--r--compiler/one-cmds/one-import-tf9
-rw-r--r--compiler/one-cmds/one-import-tflite7
-rw-r--r--compiler/one-cmds/one-optimize6
-rw-r--r--compiler/one-cmds/one-prepare-venv39
-rw-r--r--compiler/one-cmds/one-quantize82
-rw-r--r--compiler/one-cmds/onecc33
-rw-r--r--compiler/one-cmds/onelib/constant.py86
-rw-r--r--compiler/one-cmds/onelib/make_cmd.py100
-rwxr-xr-xcompiler/one-cmds/onnx_legalizer.py1065
-rw-r--r--compiler/one-cmds/tests/CMakeLists.txt26
-rw-r--r--compiler/one-cmds/tests/one-quantize_009.qconf.json36
-rw-r--r--compiler/one-cmds/tests/one-quantize_009.test55
-rw-r--r--compiler/one-cmds/tests/onnx-operations/CMakeLists.txt86
-rw-r--r--compiler/one-cmds/tests/onnx-operations/README.md28
-rw-r--r--compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh26
-rw-r--r--compiler/one-cmds/tests/onnx_legalize_run_compare.py129
-rw-r--r--compiler/one-cmds/tests/prepare_test_materials.sh33
-rw-r--r--compiler/one-cmds/tests/print_onnx_model.py20
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt109
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/README.md28
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/aux_generator.py83
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/entire_model.test40
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/example_generator.py116
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test40
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test40
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh26
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/state_dict_model.test39
-rw-r--r--compiler/one-cmds/tests/pytorch-operations/torchscript_model.test39
-rw-r--r--compiler/one-cmds/utils.py184
-rw-r--r--compiler/oneco/CMakeLists.txt8
-rw-r--r--compiler/pepper-strcast/CMakeLists.txt4
-rw-r--r--compiler/pota-quantization-value-test/CMakeLists.txt30
-rw-r--r--compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json14
-rw-r--r--compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json14
-rw-r--r--compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json9
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json32
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json32
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json28
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json28
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json7
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json52
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json10
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json61
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json34
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json9
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json38
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json34
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json14
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json53
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json76
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json9
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json80
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json76
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json14
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json95
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json5
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json32
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json32
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json13
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json21
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json5
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json5
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json52
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json58
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/requires.cmake2
-rw-r--r--compiler/pota-quantization-value-test/test.lst29
-rwxr-xr-xcompiler/pota-quantization-value-test/test_fake_wquant_with_config.sh87
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt1
-rwxr-xr-xcompiler/pota-quantization-value-test/test_quantization_with_config.sh109
-rw-r--r--compiler/pp/CMakeLists.txt4
-rw-r--r--compiler/record-minmax-conversion-test/CMakeLists.txt2
-rw-r--r--compiler/record-minmax/CMakeLists.txt10
-rw-r--r--compiler/record-minmax/requires.cmake1
-rw-r--r--compiler/record-minmax/src/HDF5Importer.h87
-rw-r--r--compiler/record-minmax/src/MinMaxObserver.cpp13
-rw-r--r--compiler/record-minmax/src/RecordMinMax.cpp106
-rw-r--r--compiler/souschef/CMakeLists.txt2
-rw-r--r--compiler/tf2tfliteV2-conversion-test/CMakeLists.txt2
-rw-r--r--compiler/tfl-inspect/CMakeLists.txt3
-rw-r--r--compiler/tfl-inspect/requires.cmake2
-rw-r--r--compiler/tfl-inspect/src/Reader.cpp74
-rw-r--r--compiler/tfl-inspect/src/Reader.h7
-rw-r--r--compiler/tfl-verify/CMakeLists.txt2
-rw-r--r--compiler/tfl-verify/requires.cmake2
-rw-r--r--compiler/tflchef/CMakeLists.txt6
-rw-r--r--compiler/tflchef/core/CMakeLists.txt2
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp8
-rw-r--r--compiler/tflchef/core/src/Op/FullyConnected.cpp1
-rw-r--r--compiler/tflchef/core/src/Op/SVDF.cpp41
-rw-r--r--compiler/tflchef/core/src/Op/SVDF.h46
-rw-r--r--compiler/tflchef/core/src/OpChef.def1
-rw-r--r--compiler/tflchef/core/src/OpChefs.h1
-rw-r--r--compiler/tflchef/proto/tflchef.proto13
-rw-r--r--compiler/tflchef/requires.cmake2
-rw-r--r--compiler/tflchef/tests/CMakeLists.txt43
-rw-r--r--compiler/tflchef/tests/signature_def_index/test.recipe3
-rw-r--r--compiler/tflchef/tests/signature_def_name/test.recipe3
-rw-r--r--compiler/tflchef/tflite/CMakeLists.txt3
-rw-r--r--compiler/tflchef/tflite/src/Op/FullyConnected.cpp1
-rw-r--r--compiler/tflchef/tflite/src/Op/SVDF.cpp59
-rw-r--r--compiler/tflchef/tflite/src/Op/SVDF.h39
-rw-r--r--compiler/tflchef/tflite/src/RecipeChef.cpp11
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.cpp49
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.h6
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpChefs.h1
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpRegistry.h1
-rw-r--r--compiler/tfldump/CMakeLists.txt10
-rw-r--r--compiler/tfldump/requires.cmake2
-rw-r--r--compiler/tfldump/src/Dump.cpp26
-rw-r--r--compiler/tfldump/src/Load.cpp2
-rw-r--r--compiler/tfldump/src/OpPrinter.cpp18
-rw-r--r--compiler/tfldump/src/Read.cpp72
-rw-r--r--compiler/tfldump/src/Read.h7
-rw-r--r--compiler/tflite2circle/CMakeLists.txt9
-rw-r--r--compiler/tflite2circle/requires.cmake4
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions.h1
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp1
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp41
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h (renamed from runtime/onert/backend/gpu_cl/open_cl/AccessType.h)30
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp42
-rw-r--r--compiler/tflite2circle/src/DataLookup.cpp16
-rw-r--r--compiler/tflite2circle/src/DataLookup.h2
-rw-r--r--compiler/tflite2circle/src/TFLBuiltinOptions.lst2
-rw-r--r--compiler/vconone/CMakeLists.txt2
-rw-r--r--docs/conf.py2
-rw-r--r--docs/howto/how-to-build-compiler.md72
-rw-r--r--docs/howto/how-to-build-runtime.md4
-rw-r--r--infra/cmake/modules/ExternalBuildTools.cmake25
-rw-r--r--infra/cmake/modules/ExternalSourceTools.cmake22
-rw-r--r--infra/cmake/modules/IdentifyPlatform.cmake4
-rw-r--r--infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake119
-rw-r--r--infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake (renamed from infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake)41
-rw-r--r--infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake (renamed from infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake)2
-rw-r--r--infra/cmake/packages/FlatBuffersConfig.cmake119
-rw-r--r--infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake21
-rw-r--r--infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake (renamed from infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake)8
-rw-r--r--infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake (renamed from infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake)2
-rw-r--r--infra/cmake/packages/FlatBuffersSourceConfig.cmake28
-rw-r--r--infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake9
-rw-r--r--infra/cmake/packages/GTestConfig.cmake17
-rw-r--r--infra/cmake/packages/GTestSourceConfig.cmake2
-rw-r--r--infra/cmake/packages/H5Tinit.c.linux-armv7l977
-rw-r--r--infra/cmake/packages/HDF5Config.cmake16
-rw-r--r--infra/cmake/packages/HDF5Source.patch195
-rw-r--r--infra/cmake/packages/HDF5SourceConfig.cmake3
-rw-r--r--infra/cmake/packages/JsoncppConfig.cmake34
-rw-r--r--infra/cmake/packages/JsoncppSourceConfig.cmake18
-rw-r--r--infra/cmake/packages/Opencl_HeadersConfig.cmake11
-rw-r--r--infra/cmake/packages/ProtobufConfig.cmake21
-rw-r--r--infra/cmake/packages/ProtobufSource.patch18
-rw-r--r--infra/cmake/packages/ProtobufSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake20
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake (renamed from infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake)2
-rw-r--r--infra/cmake/packages/TensorFlowGpuConfig.cmake22
-rw-r--r--infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch292
-rw-r--r--infra/cmake/packages/TensorFlowGpuSourceConfig.cmake74
-rw-r--r--infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt7
-rw-r--r--infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake6
-rw-r--r--infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt72
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake18
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake (renamed from infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake)2
-rw-r--r--infra/command/format118
-rw-r--r--infra/debian/compiler/control4
-rw-r--r--infra/debian/compiler/one-compiler.install4
-rw-r--r--infra/debian/compiler/postinst2
-rwxr-xr-xinfra/debian/compiler/rules2
-rw-r--r--infra/docker/bionic/Dockerfile9
-rw-r--r--infra/docker/focal/Dockerfile6
-rw-r--r--infra/nncc/CMakeLists.txt59
-rw-r--r--infra/nncc/Makefile.arm32146
-rw-r--r--infra/nncc/cmake/ApplyCompileFlags.cmake35
-rw-r--r--infra/nncc/cmake/CfgOptionFlags.cmake58
-rw-r--r--infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake24
-rw-r--r--infra/nncc/cmake/buildtool/config/config_linux.cmake11
-rw-r--r--infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake38
-rw-r--r--infra/nncc/cmake/options/options_armv7l-linux.cmake5
-rw-r--r--infra/nncc/cmake/options/options_x86_64-linux.cmake3
-rw-r--r--infra/nnfw/cmake/CfgOptionFlags.cmake7
-rw-r--r--infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake22
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/options/options_armv7hl-tizen.cmake16
-rw-r--r--infra/nnfw/cmake/options/options_armv7l-linux.cmake5
-rw-r--r--infra/nnfw/cmake/options/options_armv7l-tizen.cmake6
-rw-r--r--infra/nnfw/cmake/options/options_i686-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/options/options_x86_64-darwin.cmake2
-rw-r--r--infra/nnfw/cmake/options/options_x86_64-linux.cmake4
-rw-r--r--infra/nnfw/cmake/options/options_x86_64-tizen.cmake3
-rw-r--r--infra/nnfw/cmake/packages/CpuInfoConfig.cmake2
-rw-r--r--infra/nnfw/cmake/packages/FlatBuffersConfig.cmake4
-rw-r--r--infra/nnfw/cmake/packages/GTestConfig.cmake6
-rw-r--r--infra/nnfw/cmake/packages/TRIXEngineConfig.cmake42
-rw-r--r--infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake104
-rw-r--r--infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp24
-rw-r--r--infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp24
-rw-r--r--infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp24
-rw-r--r--infra/nnfw/config/gbs.conf2
-rw-r--r--infra/packaging/build13
-rw-r--r--infra/packaging/preset/2022032357
-rw-r--r--infra/packaging/preset/20220323_windows69
-rw-r--r--infra/packaging/res/tf2nnpkg.20220323109
-rw-r--r--infra/scripts/compiler_modules.sh2
-rwxr-xr-xinfra/scripts/docker_build_test_x64.sh3
-rwxr-xr-xinfra/scripts/docker_collect_nnpkg_resources.sh2
-rw-r--r--nnpackage/spec/10_packaging_and_manifest.md7
-rw-r--r--packaging/CPUINFO.tar.gz (renamed from packaging/cpuinfo.tar.gz)bin3476406 -> 3476406 bytes
-rw-r--r--packaging/GEMMLOWP.tar.gz (renamed from packaging/gemmlowp.tar.gz)bin830362 -> 830362 bytes
-rw-r--r--packaging/OOURAFFT.tar.gz (renamed from packaging/oourafft.tar.gz)bin111530 -> 111530 bytes
-rw-r--r--packaging/RUY.tar.gz (renamed from packaging/ruy.tar.gz)bin235110 -> 235110 bytes
-rw-r--r--packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz (renamed from packaging/eigen.tar.gz)bin2664733 -> 2664733 bytes
-rw-r--r--packaging/TENSORFLOW_GPU.tar.gzbin0 -> 579811 bytes
-rw-r--r--packaging/gtest.tar.gzbin1287207 -> 0 bytes
-rw-r--r--packaging/nnapi_test_generated.tar.gzbin819008 -> 856612 bytes
-rw-r--r--packaging/nnfw.spec48
-rw-r--r--res/CircleSchema/0.3/circle_schema.fbs1137
-rw-r--r--res/CircleSchema/0.4/circle_schema.fbs1292
-rw-r--r--res/PyTorchExamples/examples/BatchToSpaceND/__init__.py49
-rw-r--r--res/PyTorchExamples/examples/Conv2d-pad/__init__.py18
-rw-r--r--res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py24
-rw-r--r--res/PyTorchExamples/examples/LSTM-bi/__init__.py28
-rw-r--r--res/PyTorchExamples/examples/LSTM-nobias/__init__.py28
-rw-r--r--res/PyTorchExamples/examples/LSTM-noinit/__init__.py24
-rw-r--r--res/PyTorchExamples/examples/MaxPool2d-am/__init__.py18
-rw-r--r--res/PyTorchExamples/examples/PixelShuffle/__init__.py18
-rw-r--r--res/PyTorchExamples/examples/RNN-bi/__init__.py27
-rw-r--r--res/PyTorchExamples/examples/RNN-nobias/__init__.py26
-rw-r--r--res/PyTorchExamples/examples/RNN-noinit/__init__.py23
-rw-r--r--res/PyTorchExamples/examples/RNN-relu/__init__.py26
-rw-r--r--res/PyTorchExamples/examples/SpaceToBatchND/__init__.py49
-rw-r--r--res/PyTorchExamples/examples/SpaceToDepth/__init__.py30
-rw-r--r--res/PyTorchExamples/examples/clamp/__init__.py17
-rw-r--r--res/PyTorchExamples/examples/interpolate/__init__.py30
-rw-r--r--res/PyTorchExamples/examples/normalize/__init__.py17
-rw-r--r--res/PyTorchExamples/examples/strided_slice/__init__.py25
-rwxr-xr-xres/PyTorchExamples/ptem.py10
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe29
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Gather_000/test.recipe1
-rw-r--r--res/TensorFlowLiteRecipes/Gather_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Gather_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe1
-rw-r--r--res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe131
-rw-r--r--res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe82
-rw-r--r--res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe63
-rw-r--r--res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe47
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe36
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_000/test.rule10
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe92
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json16
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe92
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule14
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json16
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe88
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule14
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe47
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe47
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quantize_001/test.recipe66
-rw-r--r--res/TensorFlowLiteRecipes/Quantize_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/SVDF_000/test.recipe62
-rw-r--r--res/TensorFlowLiteRecipes/SVDF_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/SVDF_001/test.recipe52
-rw-r--r--res/TensorFlowLiteRecipes/SVDF_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe3
-rw-r--r--res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe81
-rw-r--r--res/TensorFlowLiteRecipes/Sqrt_000/test.recipe1
-rw-r--r--res/TensorFlowLiteSchema/2.7.0/schema.fbs1250
-rw-r--r--res/TensorFlowLiteSchema/2.8.0/schema.fbs1264
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/contrib/android_benchmark_app/CMakeLists.txt2
-rw-r--r--runtime/onert/api/CMakeLists.txt1
-rw-r--r--runtime/onert/api/include/nnfw.h9
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc1
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc112
-rw-r--r--runtime/onert/backend/CMakeLists.txt7
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.cc242
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.h34
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc16
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.cc6
-rw-r--r--runtime/onert/backend/acl_common/AclBackendContext.h106
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h17
-rw-r--r--runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h11
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h83
-rw-r--r--runtime/onert/backend/acl_common/CMakeLists.txt1
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.cc243
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.h35
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.cc16
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.cc6
-rw-r--r--runtime/onert/backend/cl_common/CMakeLists.txt7
-rw-r--r--runtime/onert/backend/cl_common/include/cl_common/BackendContext.h236
-rw-r--r--runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h51
-rw-r--r--runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h (renamed from runtime/onert/backend/gpu_cl/ParentInfo.h)10
-rw-r--r--runtime/onert/backend/cl_common/src/LifetimeMap.cc85
-rw-r--r--runtime/onert/backend/cpu/ops/OperationUtils.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/OperationUtils.h44
-rw-r--r--runtime/onert/backend/gpu_cl/Backend.h20
-rw-r--r--runtime/onert/backend/gpu_cl/BackendContext.cc181
-rw-r--r--runtime/onert/backend/gpu_cl/BackendContext.h29
-rw-r--r--runtime/onert/backend/gpu_cl/CMakeLists.txt26
-rw-r--r--runtime/onert/backend/gpu_cl/ClConstantInitializer.cc3
-rw-r--r--runtime/onert/backend/gpu_cl/ClConstantInitializer.h2
-rw-r--r--runtime/onert/backend/gpu_cl/ClFunction.h19
-rw-r--r--runtime/onert/backend/gpu_cl/ClMemoryManager.h135
-rw-r--r--runtime/onert/backend/gpu_cl/ClTensorBuilder.h289
-rw-r--r--runtime/onert/backend/gpu_cl/ClTensorManager.h235
-rw-r--r--runtime/onert/backend/gpu_cl/Config.cc12
-rw-r--r--runtime/onert/backend/gpu_cl/Config.h2
-rw-r--r--runtime/onert/backend/gpu_cl/KernelGenerator.cc30
-rw-r--r--runtime/onert/backend/gpu_cl/KernelGenerator.h14
-rw-r--r--runtime/onert/backend/gpu_cl/MemoryManager.h157
-rw-r--r--runtime/onert/backend/gpu_cl/TensorBuilder.cc135
-rw-r--r--runtime/onert/backend/gpu_cl/TensorBuilder.h81
-rw-r--r--runtime/onert/backend/gpu_cl/TensorBuilderHelper.h (renamed from runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h)26
-rw-r--r--runtime/onert/backend/gpu_cl/TensorManager.cc138
-rw-r--r--runtime/onert/backend/gpu_cl/TensorManager.h64
-rw-r--r--runtime/onert/backend/gpu_cl/TensorRegistry.h (renamed from runtime/onert/backend/gpu_cl/ClTensorRegistry.h)12
-rw-r--r--runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h108
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Api.cc202
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Api.h359
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Arguments.cc926
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Arguments.h175
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Buffer.cc234
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Buffer.h121
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc359
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h157
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClContext.cc177
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClContext.h68
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc448
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClDevice.h119
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClErrors.h48
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc88
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClEvent.h75
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc59
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h39
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc171
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClKernel.h101
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClMemory.h100
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc224
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ClProgram.h98
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/DataType.cc122
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/DataType.h57
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc383
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h203
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Environment.cc276
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Environment.h90
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/GpuObject.h222
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc71
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h143
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h106
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc265
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h110
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Model.h56
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ModelHints.h67
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc407
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h560
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Operations.cc704
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Operations.h586
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Precision.cc56
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Precision.h53
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc97
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h112
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Shape.cc141
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Shape.h668
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Spi.h94
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc149
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h47
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Tensor.cc690
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Tensor.h142
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/TensorType.cc1116
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/TensorType.h188
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc90
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc237
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Texture2d.h160
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Types.h183
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Util.cc264
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/Util.h278
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc178
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h48
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc258
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h59
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc64
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h43
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc480
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h205
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc282
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h137
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc1653
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h413
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc143
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h68
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc592
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h40
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc382
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h233
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc358
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h177
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc385
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h203
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc400
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h43
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc80
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h40
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc111
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h41
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc96
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h42
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc86
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h40
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc138
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h61
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h48
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc230
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h73
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc348
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h67
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc249
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h62
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc107
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h42
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc98
-rw-r--r--runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h57
-rw-r--r--runtime/onert/backend/gpu_cl/operand/CLTensor.cc21
-rw-r--r--runtime/onert/backend/gpu_cl/operand/CLTensor.h15
-rw-r--r--runtime/onert/backend/gpu_cl/operand/ICLTensor.cc219
-rw-r--r--runtime/onert/backend/gpu_cl/operand/ICLTensor.h43
-rw-r--r--runtime/onert/backend/ruy/ops/OperationUtils.h42
-rw-r--r--runtime/onert/backend/trix/Backend.h62
-rw-r--r--runtime/onert/backend/trix/BackendContext.cc64
-rw-r--r--runtime/onert/backend/trix/BackendContext.h62
-rw-r--r--runtime/onert/backend/trix/CMakeLists.txt24
-rw-r--r--runtime/onert/backend/trix/Config.cc32
-rw-r--r--runtime/onert/backend/trix/Config.h48
-rw-r--r--runtime/onert/backend/trix/DevContext.h118
-rw-r--r--runtime/onert/backend/trix/KernelGenerator.cc83
-rw-r--r--runtime/onert/backend/trix/KernelGenerator.h61
-rw-r--r--runtime/onert/backend/trix/Tensor.h (renamed from runtime/onert/backend/acl_common/ParentInfo.h)27
-rw-r--r--runtime/onert/backend/trix/TensorBuilder.h35
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.cc120
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.h63
-rw-r--r--runtime/onert/backend/trix/trix.cc24
-rw-r--r--runtime/onert/backend/xnnpack/ops/OperationUtils.h39
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h1
-rw-r--r--runtime/onert/core/include/ir/DataType.h1
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h1
-rw-r--r--runtime/onert/core/include/ir/Operations.lst1
-rw-r--r--runtime/onert/core/include/ir/operation/Bulk.h53
-rw-r--r--runtime/onert/core/include/util/CalculateActivationRange.h64
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc124
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc113
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h14
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc80
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.h5
-rw-r--r--runtime/onert/core/src/ir/DataType.cc2
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc160
-rw-r--r--runtime/onert/core/src/ir/operation/AddN.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/BCQFullyConnected.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/BCQGather.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/BatchToSpaceND.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/BinaryArithmetic.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/BroadcastTo.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Bulk.cc36
-rw-r--r--runtime/onert/core/src/ir/operation/Comparison.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Concat.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Conv2D.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/DepthToSpace.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseActivation.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseBinary.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/EmbeddingLookup.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ExpandDims.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Fill.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/FullyConnected.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Gather.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/HashtableLookup.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/InstanceNorm.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/L2Normalization.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/LSTM.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/LogSoftmax.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/MatrixBandPart.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/PReLU.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Permute.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Pool2D.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/Pow.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/RNN.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Range.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Rank.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Reduce.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/Reshape.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeBilinear.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Reverse.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Shape.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Softmax.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToBatchND.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToDepth.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Split.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/SplitV.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/SquaredDifference.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/StridedSlice.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Tile.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/TopKV2.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Transpose.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/TransposeConv.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/Unpack.cc1
-rw-r--r--runtime/onert/core/src/ir/operation/While.cc1
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h67
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc28
-rw-r--r--runtime/onert/frontend/circle_schema/include/circle_schema_generated.h1732
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc309
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc38
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_schema_generated.h4825
-rw-r--r--runtime/onert/frontend/trix/CMakeLists.txt21
-rw-r--r--runtime/onert/frontend/trix/include/trix_loader.h34
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader.cc266
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader_dummy.cc31
-rw-r--r--tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py4
-rw-r--r--tests/nnfw_api/src/CircleGen.cc7
-rw-r--r--tests/nnfw_api/src/CircleGen.h1
-rw-r--r--tests/nnfw_api/src/GenModelTests.cc2
-rw-r--r--tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc72
-rw-r--r--tests/nnfw_api/src/one_op_tests/Equal.cc108
-rw-r--r--tests/tools/nnpackage_run/CMakeLists.txt1
-rw-r--r--tests/tools/nnpackage_run/src/args.cc4
-rw-r--r--tests/tools/nnpackage_run/src/args.h4
-rw-r--r--tests/tools/nnpackage_run/src/formatter.h47
-rw-r--r--tests/tools/nnpackage_run/src/h5formatter.cc4
-rw-r--r--tests/tools/nnpackage_run/src/h5formatter.h20
-rw-r--r--tests/tools/nnpackage_run/src/nnfw_util.cc1
-rw-r--r--tests/tools/nnpackage_run/src/nnpackage_run.cc14
-rw-r--r--tests/tools/nnpackage_run/src/randomgen.cc3
-rw-r--r--tests/tools/nnpackage_run/src/rawformatter.cc103
-rw-r--r--tests/tools/nnpackage_run/src/rawformatter.h40
-rw-r--r--tests/tools/tflite_comparator/src/tflite_comparator.cc4
-rwxr-xr-xtools/cross/install_rootfs.sh8
-rw-r--r--tools/nnpackage_tool/model2nnpkg/README.md4
-rwxr-xr-xtools/nnpackage_tool/model2nnpkg/model2nnpkg.sh4
-rw-r--r--tools/tflitefile_tool/ir/README.md5
-rw-r--r--tools/tflitefile_tool/ir/__init__.py0
-rwxr-xr-xtools/tflitefile_tool/ir/graph_stats.py (renamed from tools/tflitefile_tool/graph_stats.py)33
-rw-r--r--tools/tflitefile_tool/ir/operator.py108
-rw-r--r--tools/tflitefile_tool/ir/subgraph.py170
-rw-r--r--tools/tflitefile_tool/ir/tensor.py120
-rwxr-xr-xtools/tflitefile_tool/model_parser.py95
-rwxr-xr-xtools/tflitefile_tool/operation.py209
-rwxr-xr-xtools/tflitefile_tool/operator_parser.py97
-rwxr-xr-xtools/tflitefile_tool/operator_printer.py59
-rwxr-xr-xtools/tflitefile_tool/operator_wrapping.py115
-rwxr-xr-xtools/tflitefile_tool/option_printer.py67
-rw-r--r--tools/tflitefile_tool/parser/__init__.py0
-rwxr-xr-xtools/tflitefile_tool/parser/model_parser.py31
-rw-r--r--tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py40
-rwxr-xr-xtools/tflitefile_tool/parser/tflite/tflite_operator.py63
-rw-r--r--tools/tflitefile_tool/parser/tflite/tflite_option.py96
-rwxr-xr-xtools/tflitefile_tool/parser/tflite/tflite_parser.py112
-rwxr-xr-xtools/tflitefile_tool/parser/tflite/tflite_subgraph.py30
-rwxr-xr-xtools/tflitefile_tool/parser/tflite/tflite_tensor.py (renamed from tools/tflitefile_tool/tensor_wrapping.py)62
-rw-r--r--tools/tflitefile_tool/printer/__init__.py0
-rw-r--r--tools/tflitefile_tool/printer/string_builder.py175
-rwxr-xr-xtools/tflitefile_tool/printer/subgraph_printer.py106
-rw-r--r--tools/tflitefile_tool/saver/__init__.py0
-rwxr-xr-xtools/tflitefile_tool/saver/config_saver.py (renamed from tools/tflitefile_tool/config_saver.py)58
-rwxr-xr-xtools/tflitefile_tool/saver/model_saver.py (renamed from tools/tflitefile_tool/model_saver.py)10
-rwxr-xr-xtools/tflitefile_tool/select_operator.py49
-rwxr-xr-xtools/tflitefile_tool/subgraph_printer.py90
-rwxr-xr-xtools/tflitefile_tool/tensor_printer.py85
-rw-r--r--tools/tflitefile_tool/tests/README.md36
-rw-r--r--tools/tflitefile_tool/tests/__init__.py0
-rw-r--r--tools/tflitefile_tool/tests/main.py23
-rw-r--r--tools/tflitefile_tool/tests/test_operator.py74
-rw-r--r--tools/tflitefile_tool/tests/test_setup.py46
-rw-r--r--tools/tflitefile_tool/tests/test_string_builder.py63
-rw-r--r--tools/tflitefile_tool/tests/test_subgraph.py120
-rw-r--r--tools/tflitefile_tool/tests/test_tensor.py67
-rw-r--r--tools/tflitefile_tool/tests/test_tflite_parser.py63
1329 files changed, 53226 insertions, 38787 deletions
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 40635d443..95e11d0f9 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -102,13 +102,15 @@ test:
testCaseFolder:
- /compiler/angkor
- /compiler/arser
- - /compiler/circle2circle
+ - /compiler/circle-partitioner
- /compiler/circle-quantizer
- /compiler/circle-tensordump
+ - /compiler/circle2circle
- /compiler/circlechef
- /compiler/circledump
- /compiler/crew
- /compiler/cwrap
+ - /compiler/dio-hdf5
- /compiler/foder
- /compiler/hermes
- /compiler/hermes-std
@@ -122,13 +124,16 @@ test:
- /compiler/luci-eval-driver
- /compiler/luci-pass-value-test
- /compiler/luci-value-test
- - /compiler/mio-circle
+ - /compiler/mio-circle04
- /compiler/mio-tflite
+ - /compiler/mio-tflite260
- /compiler/oops
- /compiler/pepper-assert
+ - /compiler/pepper-csv2vec
- /compiler/pepper-str
- /compiler/pepper-strcast
- /compiler/pp
+ - /compiler/rawdata2hdf5
- /compiler/record-minmax
- /compiler/safemain
- /compiler/souschef
diff --git a/.gitattributes b/.gitattributes
index d36985416..3ef12efd7 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,16 @@
-tests/nnapi/specs/* linguist-detectable=false
-res/* linguist-detectable=false
+# Exclude from git language statistics
+tests/nnapi/specs/** linguist-detectable=false
+res/** linguist-detectable=false
+
+# Default: text file
+# - Set End-Of-Line type
+* text eol=lf
+
+# Binary - ignore text file setting
+*.caffemodel -text
+*.png -text
+*.pdf -text
+*.h5 -text
+*.tar.gz -text
+*.tflite -text
+*.bmp -text
diff --git a/compiler/angkor/CMakeLists.txt b/compiler/angkor/CMakeLists.txt
index 44b5e9058..7f5cb88c2 100644
--- a/compiler/angkor/CMakeLists.txt
+++ b/compiler/angkor/CMakeLists.txt
@@ -5,7 +5,9 @@ list(REMOVE_ITEM SOURCES ${TESTS})
# NOTE STATIC is deliberately used here to allow clients to use 'angkor' without installation
add_library(angkor STATIC ${HEADERS} ${SOURCES})
-set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
set_target_properties(angkor PROPERTIES LINKER_LANGUAGE CXX)
target_include_directories(angkor PUBLIC include)
target_link_libraries(angkor PRIVATE nncc_common)
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp
index 4e88f0cb7..63121b845 100644
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -23,30 +23,9 @@
#include "arser/arser.h"
-using namespace arser;
+#include "Prompt.h"
-class Prompt
-{
-public:
- Prompt(const std::string &command)
- {
- std::istringstream iss(command);
- std::vector<std::string> token(std::istream_iterator<std::string>{iss},
- std::istream_iterator<std::string>());
- _arg = std::move(token);
- _argv.reserve(_arg.size());
- for (const auto &t : _arg)
- {
- _argv.push_back(const_cast<char *>(t.data()));
- }
- }
- int argc(void) const { return _argv.size(); }
- char **argv(void) { return _argv.data(); }
-
-private:
- std::vector<char *> _argv;
- std::vector<std::string> _arg;
-};
+using namespace arser;
TEST(BasicTest, option)
{
@@ -57,7 +36,7 @@ TEST(BasicTest, option)
.nargs(0)
.help("It provides additional details as to what the executable is doing");
- Prompt prompt("./executable --verbose");
+ test::Prompt prompt("./executable --verbose");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -79,7 +58,7 @@ TEST(BasicTest, OptionalArgument)
.type(arser::DataType::FLOAT)
.help("Set a frequency as you provided.");
- Prompt prompt("./radio --volume 5 --frequency 128.5");
+ test::Prompt prompt("./radio --volume 5 --frequency 128.5");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -103,7 +82,7 @@ TEST(BasicTest, NonRequiredOptionalArgument_NEG)
.type(arser::DataType::INT32)
.help("Set a volume as you provided.");
- Prompt prompt("./radio"); // empty argument
+ test::Prompt prompt("./radio"); // empty argument
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -122,7 +101,7 @@ TEST(BasicTest, RequiredOptionalArgument_NEG)
.required()
.help("Set a volume as you provided.");
- Prompt prompt("./radio");
+ test::Prompt prompt("./radio");
/* act */ /* assert */
EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
}
@@ -134,7 +113,7 @@ TEST(BasicTest, OptionalMultipleArgument)
arser.add_argument("--add").nargs(2).type(arser::DataType::INT32_VEC).help("Add two numbers.");
- Prompt prompt("./calculator --add 3 5");
+ test::Prompt prompt("./calculator --add 3 5");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -167,8 +146,8 @@ TEST(BasicTest, MultipleOptionalArgument)
.help("give traning data to this program.")
.required();
- Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
- "--training_data 2 43 234 3 334");
+ test::Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
+ "--training_data 2 43 234 3 334");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -195,7 +174,7 @@ TEST(BasicTest, MultipleFloatValue)
.type(arser::DataType::FLOAT_VEC)
.help("Add two float numbers.");
- Prompt prompt("./calculator --add_float 3.2 5.4");
+ test::Prompt prompt("./calculator --add_float 3.2 5.4");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -217,7 +196,7 @@ TEST(BasicTest, MultipleStringValue)
.type(arser::DataType::STR_VEC)
.help("insert your three favorite color");
- Prompt prompt("./color_factory --three_color red blue yellow");
+ test::Prompt prompt("./color_factory --three_color red blue yellow");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -241,7 +220,7 @@ TEST(BasicTest, ExitWithFunctionCall)
arser.add_argument("--name").nargs(1).type(arser::DataType::STR).help("Name your hero");
- Prompt prompt("./hero --history");
+ test::Prompt prompt("./hero --history");
/* act */ /* assert */
EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
"When I was young..");
@@ -258,7 +237,7 @@ TEST(BasicTest, ExitWithFunctionCallWithBind)
.help("Show version and exit")
.exit_with(std::bind(printVersion, "1.2.0"));
- Prompt prompt("./arser --version");
+ test::Prompt prompt("./arser --version");
/* act */ /* assert */
EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
"arser version : 1.2.0");
@@ -275,7 +254,7 @@ TEST(BasicTest, ExitWithFunctionCallWithLamda)
arser.add_argument("OS").nargs(1).type(arser::DataType::STR).help("The OS you want to boot");
- Prompt prompt("./computer --shutdown");
+ test::Prompt prompt("./computer --shutdown");
/* act */ /* assert */
EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "Good bye..");
}
@@ -315,7 +294,7 @@ TEST(BasicTest, DefaultValue)
.default_value("no name")
.help("Enter your name");
- Prompt prompt("/phone --time 1 52 34 --name arser");
+ test::Prompt prompt("/phone --time 1 52 34 --name arser");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -359,7 +338,7 @@ TEST(BasicTest, shortOption)
.help("output path of this program.")
.required(true);
- Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
+ test::Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -385,7 +364,7 @@ TEST(BasicTest, shortMultipleOption)
.help("output path of this program.")
.required(true);
- Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
+ test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -411,7 +390,7 @@ TEST(BasicTest, OptWithRequiredDuplicate_NEG)
.help("output path of this program.")
.required(true);
- Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+ test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
/* act */ /* assert */
EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
}
@@ -432,7 +411,7 @@ TEST(BasicTest, OptWithNonRequiredDuplicate)
.help("output path of this program.")
.required(true);
- Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+ test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -449,7 +428,7 @@ TEST(BasicTest, AccumulateVectorOptions)
arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC);
- Prompt prompt("./driver --specify a b c --specify 1 2 3");
+ test::Prompt prompt("./driver --specify a b c --specify 1 2 3");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -473,7 +452,7 @@ TEST(BasicTest, AccumulateScalarOptions)
arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
- Prompt prompt("./driver --specify 1 --specify 2");
+ test::Prompt prompt("./driver --specify 1 --specify 2");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
@@ -491,7 +470,7 @@ TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG)
arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
- Prompt prompt("./driver --specify 1 --specify 2");
+ test::Prompt prompt("./driver --specify 1 --specify 2");
/* act */
arser.parse(prompt.argc(), prompt.argv());
/* assert */
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt
new file mode 100644
index 000000000..4d86f8097
--- /dev/null
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
+target_include_directories(circle-eval-diff PRIVATE include)
+
+target_link_libraries(circle-eval-diff arser)
+target_link_libraries(circle-eval-diff safemain)
+target_link_libraries(circle-eval-diff foder)
+target_link_libraries(circle-eval-diff loco)
+target_link_libraries(circle-eval-diff luci_import)
+target_link_libraries(circle-eval-diff luci_lang)
+target_link_libraries(circle-eval-diff luci_interpreter)
+target_link_libraries(circle-eval-diff dio_hdf5)
+target_link_libraries(circle-eval-diff vconone)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# circle-eval-diff is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+ "src/MetricPrinter.cpp"
+ "src/Tensor.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE src)
+target_link_libraries(circle_eval_diff_test luci_testhelper)
+target_link_libraries(circle_eval_diff_test nncc_coverage)
diff --git a/compiler/circle-eval-diff/README.md b/compiler/circle-eval-diff/README.md
new file mode 100644
index 000000000..a3727cc6d
--- /dev/null
+++ b/compiler/circle-eval-diff/README.md
@@ -0,0 +1,51 @@
+# circle-eval-diff
+
+_circle-eval-diff_ compares inference results of two circle models.
+
+## Use cases
+
+1. _circle-eval-diff_ can be used to evaluate reconstruction errors of quantized models.
+2. _circle-eval-diff_ can be used to verify optimization (or any kind of value-preserving conversion) is safe.
+
+## Usage
+
+Run circle-eval-diff with the following arguments.
+
+--first_input_model: first model to compare (.circle).
+
+--second_input_model: second model to compare (.circle).
+
+--first_input_data: input data for the first model (.h5, directory). Random data will be used if this argument is not given.
+
+--second_input_data: input data for the second model (.h5, directory). Random data will be used if this argument is not given.
+
+--input_data_format: input data format (h5 (default), directory).
+
+--metric: metric to compare inference results (MAE (default), etc).
+
+```
+$ ./circle-eval-diff
+ --first_input_model <first_input_model>
+ --second_input_model <second_input_model>
+ --first_input_data <first_input_data>
+ --second_input_data <second_input_data>
+ --input_data_format <data_format>
+ --metric <metric>
+```
+
+For example,
+```
+$ ./circle-eval-diff
+ --first_input_model A.circle
+ --second_input_model B.circle
+ --first_input_data A.h5
+ --second_input_data B.h5
+ --input_data_format h5
+ --metric MAE
+```
+
+It will print MAE (Mean Absolute Error) between the inference result of A.circle with A.h5 and that of B.circle with B.h5.
+
+## Note
+
+Circle models are executed by _luci-interpreter_.
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp
new file mode 100644
index 000000000..f4a12a403
--- /dev/null
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+using namespace circle_eval_diff;
+
+namespace
+{
+
+std::string to_lower_case(std::string s)
+{
+ std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+ return s;
+}
+
+Metric to_metric(const std::string &str)
+{
+ if (to_lower_case(str).compare("mae") == 0)
+ return Metric::MAE;
+
+ throw std::runtime_error("Unsupported metric.");
+}
+
+InputFormat to_input_format(const std::string &str)
+{
+ if (to_lower_case(str).compare("h5") == 0)
+ return InputFormat::H5;
+
+ throw std::runtime_error("Unsupported input format.");
+}
+
+void print_version(void)
+{
+ std::cout << "circle-eval-diff version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+int entry(const int argc, char **argv)
+{
+ arser::Arser arser("Compare inference results of two circle models");
+
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
+ arser.add_argument("--first_model")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("First input model filepath");
+
+ arser.add_argument("--second_model")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("Second input model filepath");
+
+ arser.add_argument("--first_input_data")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
+ "randomly generated data");
+
+ arser.add_argument("--second_input_data")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
+ "randomly generated data");
+
+ arser.add_argument("--metric")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("MAE")
+ .help("Metric for comparison (default: MAE)");
+
+ arser.add_argument("--input_data_format")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("h5")
+ .help("Input data format. h5/hdf5 (default) or directory");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cout << err.what() << std::endl;
+ std::cout << arser;
+ return 255;
+ }
+
+ const auto first_model_path = arser.get<std::string>("--first_model");
+ const auto second_model_path = arser.get<std::string>("--second_model");
+
+ // Default values
+ std::string first_input_data_path;
+ std::string second_input_data_path;
+ std::string metric;
+ std::string input_data_format;
+
+ if (arser["--first_input_data"])
+ first_input_data_path = arser.get<std::string>("--first_input_data");
+
+ if (arser["--second_input_data"])
+ second_input_data_path = arser.get<std::string>("--second_input_data");
+
+ if (arser["--first_input_data"] != arser["--second_input_data"])
+ throw std::runtime_error("Input data path should be given for both first_model and "
+ "second_model, or neither must be given.");
+
+ metric = arser.get<std::string>("--metric");
+ input_data_format = arser.get<std::string>("--input_data_format");
+
+ auto ctx = std::make_unique<CircleEvalDiff::Context>();
+ {
+ ctx->first_model_path = first_model_path;
+ ctx->second_model_path = second_model_path;
+ ctx->metric = to_metric(metric);
+ ctx->input_format = to_input_format(input_data_format);
+ }
+
+ CircleEvalDiff ced(std::move(ctx));
+
+ ced.init();
+
+ ced.evalDiff(first_input_data_path, second_input_data_path);
+
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h
new file mode 100644
index 000000000..bf6aff46d
--- /dev/null
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <string>
+#include <memory>
+
+namespace circle_eval_diff
+{
+
+// Forward declaration
+class ModuleEvalDiff;
+
+enum class Metric
+{
+ Undefined, // For debugging
+ MAE,
+};
+
+enum class InputFormat
+{
+ Undefined, // For debugging
+ H5,
+ // TODO Implement Random, Directory
+};
+
+class CircleEvalDiff final
+{
+public:
+ struct Context
+ {
+ std::string first_model_path;
+ std::string second_model_path;
+ Metric metric = Metric::Undefined;
+ InputFormat input_format = InputFormat::Undefined;
+ };
+
+public:
+ CircleEvalDiff(std::unique_ptr<Context> &&ctx);
+
+ ~CircleEvalDiff();
+
+ void init();
+
+ // Evaluate two circle models for the given input data and compare the results
+ void evalDiff(const std::string &first_input_data_path,
+ const std::string &second_input_data_path) const;
+
+private:
+ std::unique_ptr<Context> _ctx;
+ std::unique_ptr<ModuleEvalDiff> _runner;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/requires.cmake b/compiler/circle-eval-diff/requires.cmake
new file mode 100644
index 000000000..cae9b7c62
--- /dev/null
+++ b/compiler/circle-eval-diff/requires.cmake
@@ -0,0 +1,7 @@
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
+require("safemain")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
new file mode 100644
index 000000000..c39a11371
--- /dev/null
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+#include "ModuleEvalDiff.h"
+#include "MetricPrinter.h"
+
+#include <foder/FileLoader.h>
+#include <luci/Importer.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+std::unique_ptr<luci::Module> import(const std::string &model_path)
+{
+ // Load model from the file
+ foder::FileLoader loader{model_path};
+ std::vector<char> model_data = loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+ model_data.size()};
+ if (not circle::VerifyModelBuffer(verifier))
+ {
+ throw std::runtime_error("Failed to verify circle '" + model_path + "'");
+ }
+
+ auto module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+
+ if (not module)
+ throw std::runtime_error("Failed to load '" + model_path + "'");
+
+ return module;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx)
+ : _ctx(std::move(ctx)), _runner(nullptr)
+{
+}
+
+CircleEvalDiff::~CircleEvalDiff() = default;
+
+void CircleEvalDiff::init()
+{
+ // Set metric
+ std::unique_ptr<MetricPrinter> metric;
+ switch (_ctx->metric)
+ {
+ case Metric::MAE:
+ metric = std::make_unique<MAEPrinter>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported metric.");
+ }
+
+ auto first_module = import(_ctx->first_model_path);
+ auto second_module = import(_ctx->second_model_path);
+
+ // Set runner
+ switch (_ctx->input_format)
+ {
+ case InputFormat::H5:
+ _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module),
+ std::move(metric));
+ break;
+ default:
+ throw std::runtime_error("Unsupported input format.");
+ }
+}
+
+void CircleEvalDiff::evalDiff(const std::string &first_input_data_path,
+ const std::string &second_input_data_path) const
+{
+ _runner->evalDiff(first_input_data_path, second_input_data_path);
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp
new file mode 100644
index 000000000..d65eb9b63
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+
+#define THROW_UNLESS(COND, MSG) \
+ if (not(COND)) \
+ throw std::runtime_error(MSG);
+
+namespace
+{
+
+template <typename T> bool same_shape(const T a, const T b)
+{
+ if (a->rank() != b->rank())
+ return false;
+
+ for (uint32_t i = 0; i < a->rank(); i++)
+ {
+ if (not(a->dim(i) == b->dim(i)))
+ return false;
+ }
+
+ return true;
+}
+
+template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
+{
+ assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
+
+ auto fp32_tensor = std::make_shared<Tensor>();
+ {
+ fp32_tensor->dtype(loco::DataType::FLOAT32);
+ fp32_tensor->rank(tensor->rank());
+ for (uint32_t i = 0; i < tensor->rank(); i++)
+ fp32_tensor->dim(i) = tensor->dim(i);
+
+ const auto num_elems = tensor->size<DT>();
+ fp32_tensor->size<loco::DataType::FLOAT32>(num_elems);
+ for (uint32_t i = 0; i < num_elems; i++)
+ fp32_tensor->at<loco::DataType::FLOAT32>(i) = static_cast<float>(tensor->at<DT>(i));
+ }
+ return fp32_tensor;
+}
+
+std::shared_ptr<Tensor> fp32(const std::shared_ptr<Tensor> &tensor)
+{
+ switch (tensor->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ return tensor;
+ case loco::DataType::U8:
+ return to_fp32<loco::DataType::U8>(tensor);
+ case loco::DataType::S16:
+ return to_fp32<loco::DataType::S16>(tensor);
+ default:
+ throw std::runtime_error("Unsupported data type.");
+ }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+ THROW_UNLESS(first != nullptr, "Invalid module.");
+ THROW_UNLESS(second != nullptr, "Invalid module.");
+
+ const auto first_output = loco::output_nodes(first->graph());
+ const auto second_output = loco::output_nodes(second->graph());
+
+ assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+ assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS
+
+ // Create tensors to store intermediate results
+ _intermediate.emplace_back();
+ _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+ // NOTE Use both first_node and second_node to avoid release build break
+ _intermediate.at(i).rank(first_node->rank());
+ uint32_t num_elems = 1;
+ for (uint32_t j = 0; j < second_node->rank(); j++)
+ {
+ _intermediate.at(i).dim(j) = second_node->dim(j);
+ num_elems *= second_node->dim(j).value();
+ }
+ _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+ // Check the buffer is initilized with zero
+ for (uint32_t j = 0; j < num_elems; j++)
+ assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+ // Save output names for logging
+ _output_names.emplace_back(first_node->name());
+ }
+}
+
+void MAEPrinter::accum_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b)
+{
+ assert(a->dtype() == loco::DataType::FLOAT32 and
+ b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+ assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS
+ assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+ {
+ _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+ std::abs(a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+ }
+}
+
+void MAEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second)
+{
+ assert(first.size() == second.size()); // FIX_CALLER_UNLESS
+ assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto first_output = first[output_idx];
+ const auto second_output = second[output_idx];
+
+ // Cast data to fp32 and then compute absolute error
+ const auto fp32_first_output = fp32(first_output);
+ const auto fp32_second_output = fp32(second_output);
+
+ accum_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+ }
+
+ _num_data++;
+}
+
+void MAEPrinter::dump(std::ostream &os) const
+{
+ os << "Mean Absolute Error (MAE)" << std::endl;
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto name = _output_names.at(output_idx);
+ const auto &inter = _intermediate.at(output_idx);
+ assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+ const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+ // Compute MAE
+ float mae = 0.0;
+ for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+ mae += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+ mae = mae / elem_count;
+ mae = mae / _num_data;
+
+ os << "MAE for " << name << " is " << mae << std::endl;
+ }
+}
+
+} // namespace circle_eval_diff
+
+#undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h
new file mode 100644
index 000000000..b51581c31
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+#define __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+
+#include <luci/IR/Module.h>
+
+#include "Tensor.h"
+
+#include <vector>
+#include <iostream>
+
+namespace circle_eval_diff
+{
+
+// Class to print metrics
+// How to use?
+//
+// MetricPrinter metric;
+// metric.init(first_module, second_module); // optional initialization
+//
+// for (..) // Evaluate data one by one
+// {
+// ..
+// metric.accumulate(first_result, second_result); // accumulate results
+// }
+//
+// std::cout << &metric << std::endl; // print result
+class MetricPrinter
+{
+public:
+ virtual ~MetricPrinter() = default;
+
+ // Child class can implement this function if necessary
+ // NOTE init can be skipped
+ virtual void init(const luci::Module *, const luci::Module *) {}
+
+ // Accumulate results of comparing the first and the second model's outputs
+ virtual void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second) = 0;
+
+ // Dump the final result of the corresponding metric
+ virtual void dump(std::ostream &os) const = 0;
+};
+
+static inline std::ostream &operator<<(std::ostream &os, const MetricPrinter *m)
+{
+ m->dump(os);
+ return os;
+}
+
+// Mean Absolute Error
+class MAEPrinter final : public MetricPrinter
+{
+public:
+ void init(const luci::Module *first, const luci::Module *second);
+
+ void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second);
+
+ void dump(std::ostream &os) const;
+
+private:
+ void accum_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b);
+
+private:
+ // Store accumulated sum of absolute error for each output
+ std::vector<Tensor> _intermediate;
+ std::vector<std::string> _output_names;
+ uint32_t _num_data = 0;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
new file mode 100644
index 000000000..51ca89799
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape,
+ const std::vector<T> &values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+ node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < values.size(); ++i) \
+ node->at<DT>(i) = values[i]; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+
+/**
+ * Simple graph which adds constant (addition) to the input
+ *
+ * [Input] [Const] (addition)
+ * \ /
+ * [Add]
+ *
+ */
+class AddGraphlet
+{
+public:
+ AddGraphlet() = default;
+
+ void init(loco::Graph *g, float addition)
+ {
+ std::vector<float> addition_val;
+ for (uint32_t i = 0; i < 16; i++)
+ addition_val.push_back(addition);
+ _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+ _add = g->nodes()->create<luci::CircleAdd>();
+ _add->y(_add_c);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _add->dtype(loco::DataType::FLOAT32);
+ _add->shape({1, 16});
+ _add->name("add");
+ }
+
+protected:
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_add_c = nullptr;
+};
+
+class AddOneGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+ AddOneGraph() = default;
+
+ void init(void)
+ {
+ luci::test::TestIOGraph::init({1, 4}, {1, 16});
+ AddGraphlet::init(g(), 1.0);
+
+ _add->x(input());
+
+ output()->from(_add);
+ }
+
+ std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+class AddTwoGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+ AddTwoGraph() = default;
+
+ void init(void)
+ {
+ luci::test::TestIOGraph::init({1, 4}, {1, 16});
+ AddGraphlet::init(g(), 2.0);
+
+ _add->x(input());
+
+ output()->from(_add);
+ }
+
+ std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+ uint32_t num_elem = 1;
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ num_elem *= node->dim(i).value();
+ return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> create_empty_tensor(const luci::CircleNode *node)
+{
+ auto tensor = std::make_shared<Tensor>();
+ {
+ tensor->dtype(node->dtype());
+ tensor->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ tensor->dim(i) = node->dim(i);
+ tensor->size<loco::DataType::FLOAT32>(numElements(node));
+ }
+
+ return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, float value)
+{
+ auto outputs = loco::output_nodes(module->graph());
+ assert(outputs.size() == 1);
+ auto output = *outputs.begin();
+ auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+ auto tensor = create_empty_tensor(output_cnode);
+ auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+ for (uint32_t i = 0; i < tensor_size; i++)
+ {
+ tensor->at<loco::DataType::FLOAT32>(i) = value;
+ }
+ return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+TEST(CircleEvalMetricPrinterTest, MAE_simple)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ MAEPrinter mae;
+
+ mae.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ auto output = output_tensor_with_value(&first, 1.0);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ auto output = output_tensor_with_value(&second, 2.0);
+ second_result.emplace_back(output);
+ }
+
+ mae.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ mae.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("MAE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
+{
+ MAEPrinter mae;
+
+ EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
new file mode 100644
index 000000000..85f985873
--- /dev/null
+++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleEvalDiff.h"
+#include "Tensor.h"
+
+#include <luci_interpreter/Interpreter.h>
+#include <dio_hdf5/HDF5Importer.h>
+
+#include <string>
+#include <stdexcept>
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+using HDF5Importer = dio::hdf5::HDF5Importer;
+
+namespace
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+ // Type check
+ if (dtype != input_node->dtype())
+ throw std::runtime_error("Wrong input type.");
+
+ if (shape.size() != input_node->rank())
+ throw std::runtime_error("Input rank mismatch.");
+
+ for (uint32_t i = 0; i < shape.size(); i++)
+ {
+ if (not(shape.at(i) == input_node->dim(i)))
+ throw std::runtime_error("Input shape mismatch.");
+ }
+}
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+ uint32_t num_elem = 1;
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ num_elem *= node->dim(i).value();
+ return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+ auto tensor = std::make_shared<Tensor>();
+ {
+ tensor->dtype(node->dtype());
+ tensor->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ tensor->dim(i) = node->dim(i);
+
+ switch (node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ tensor->size<loco::DataType::FLOAT32>(numElements(node));
+ break;
+ case loco::DataType::U8:
+ tensor->size<loco::DataType::U8>(numElements(node));
+ break;
+ case loco::DataType::S16:
+ tensor->size<loco::DataType::S16>(numElements(node));
+ break;
+ case loco::DataType::S32:
+ tensor->size<loco::DataType::S32>(numElements(node));
+ break;
+ case loco::DataType::S64:
+ tensor->size<loco::DataType::S64>(numElements(node));
+ break;
+ default:
+ throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+ }
+ }
+
+ return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path,
+ const std::string &second_input_data_path) const
+{
+ const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get());
+
+ _metric->init(_first_module.get(), _second_module.get());
+
+ try
+ {
+ HDF5Importer first_h5(first_input_data_path);
+ first_h5.importGroup("value");
+
+ HDF5Importer second_h5(second_input_data_path);
+ second_h5.importGroup("value");
+
+ const auto first_num_data = first_h5.numData();
+ const auto second_num_data = second_h5.numData();
+
+ if (first_num_data != second_num_data)
+ throw std::runtime_error(
+ "Number of data in the first data file and the second data file mismatches.");
+
+ if (first_num_data == 0)
+ throw std::runtime_error("Input data file does not contain any record.");
+
+ const auto first_input_nodes = loco::input_nodes(_first_module->graph());
+ const auto first_num_inputs = first_input_nodes.size();
+ const auto first_output_nodes = loco::output_nodes(_first_module->graph());
+ const auto first_num_outputs = first_output_nodes.size();
+
+ const auto second_input_nodes = loco::input_nodes(_second_module->graph());
+ const auto second_num_inputs = second_input_nodes.size();
+ const auto second_output_nodes = loco::output_nodes(_second_module->graph());
+ const auto second_num_outputs = second_output_nodes.size();
+
+ for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++)
+ {
+ std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+ if (first_num_inputs != first_h5.numInputs(data_idx) ||
+ second_num_inputs != second_h5.numInputs(data_idx))
+ throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) +
+ "th data.");
+
+ // Do inference and return output
+ auto eval = [&](HDF5Importer &h5, uint32_t num_inputs,
+ const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs,
+ const std::vector<loco::Node *> &output_nodes) {
+ // Write input data
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ {
+ const auto *input_node =
+ loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+
+ auto tensor = createEmptyTensor(input_node);
+ if (h5.isRawData())
+ {
+ h5.readTensor(data_idx, input_idx, tensor->buffer());
+ }
+ else
+ {
+ DataType dtype;
+ Shape shape;
+ h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer());
+
+ // Check the type and the shape of the input data is valid
+ verifyTypeShape(input_node, dtype, shape);
+ }
+
+ interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size());
+ }
+
+ // Interpret
+ interp->interpret();
+
+ // Read output data
+ std::vector<std::shared_ptr<Tensor>> outputs;
+ for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++)
+ {
+ const auto *output_node =
+ loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+ assert(output_node->index() == output_idx);
+
+ auto tensor = createEmptyTensor(output_node);
+ interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+ outputs.emplace_back(tensor);
+ }
+
+ return outputs;
+ };
+
+ auto first_output =
+ eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes);
+ auto second_output = eval(second_h5, second_num_inputs, second_input_nodes,
+ second_num_outputs, second_output_nodes);
+
+ // Accumulate diffs
+ _metric->accumulate(first_output, second_output);
+ }
+
+ std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl;
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ throw std::runtime_error("HDF5 error occurred.");
+ }
+
+ // Print metric
+ std::cout << _metric.get() << std::endl;
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h
new file mode 100644
index 000000000..c7642f60b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+
+namespace circle_eval_diff
+{
+
+class ModuleEvalDiff
+{
+public:
+ ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
+ std::unique_ptr<MetricPrinter> &&metric)
+ : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric))
+ {
+ }
+
+ virtual ~ModuleEvalDiff() = default;
+
+ // Implement this in the child class
+ virtual void evalDiff(const std::string &first_input_data_path,
+ const std::string &second_input_data_path) const = 0;
+
+protected:
+ std::unique_ptr<luci::Module> _first_module;
+ std::unique_ptr<luci::Module> _second_module;
+ std::unique_ptr<MetricPrinter> _metric;
+};
+
+class H5InputEvalDiff final : public ModuleEvalDiff
+{
+public:
+ H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
+ std::unique_ptr<MetricPrinter> &&metric)
+ : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric))
+ {
+ }
+
+ void evalDiff(const std::string &first_input_data_path,
+ const std::string &second_input_data_path) const;
+};
+
+// TODO Implement ModuleEvalDiff for random input and directory input
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp
new file mode 100644
index 000000000..6710e8c3d
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <cassert>
+
+namespace circle_eval_diff
+{
+
+#define THROW_UNLESS(COND, MSG) \
+ if (not(COND)) \
+ throw std::runtime_error(MSG);
+
+template <loco::DataType DT> uint32_t Tensor::size(void) const
+{
+ assert(dtype() == DT);
+ assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+ return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void Tensor::size(uint32_t l)
+{
+ assert(dtype() == DT);
+ _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) const
+{
+ assert(dtype() == DT);
+ THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+ return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n)
+{
+ assert(dtype() == DT);
+ THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+ return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#undef THROW_UNLESS
+
+#define INSTANTIATE(DT) \
+ template uint32_t Tensor::size<DT>(void) const; \
+ template void Tensor::size<DT>(uint32_t); \
+ template const typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t) const; \
+ template typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S64);
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::U8);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h
new file mode 100644
index 000000000..65ab60638
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_TENSOR_H__
+#define __CIRCLE_EVAL_DIFF_TENSOR_H__
+
+#include <loco.h>
+
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+struct TensorDataType
+{
+public:
+ const loco::DataType &dtype(void) const { return _dtype; }
+ void dtype(const loco::DataType &dtype) { _dtype = dtype; }
+
+private:
+ loco::DataType _dtype = loco::DataType::Unknown;
+};
+
+struct TensorShape
+{
+public:
+ uint32_t rank(void) const { return _dims.size(); }
+ void rank(uint32_t value) { _dims.resize(value); }
+
+ const loco::Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+ loco::Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+ void shape(std::initializer_list<uint32_t> dims)
+ {
+ rank(dims.size());
+
+ uint32_t axis = 0;
+ for (auto d : dims)
+ {
+ dim(axis++) = d;
+ }
+ }
+
+private:
+ std::vector<loco::Dimension> _dims;
+};
+
+// Tensor has three kinds of data
+// 1. DataType (_dtype)
+// 2. Shape (_dims)
+// 3. Buffer (_data)
+struct Tensor final : public TensorShape, public TensorDataType
+{
+public:
+ template <loco::DataType DT> uint32_t size(void) const;
+ template <loco::DataType DT> void size(uint32_t size);
+ template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+ uint8_t *buffer(void) { return _data.data(); }
+ uint32_t byte_size(void) const { return _data.size(); }
+
+private:
+ std::vector<uint8_t> _data;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp
new file mode 100644
index 000000000..3bdeaecdf
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+template <loco::DataType DT> void test_out_of_buffer_range()
+{
+ Tensor t;
+
+ t.shape({1, 2, 3});
+ t.dtype(DT);
+ t.size<DT>(6);
+
+ EXPECT_ANY_THROW(t.at<DT>(6));
+}
+
+template <loco::DataType DT> void test_getter_setter()
+{
+ Tensor t;
+
+ // Check shape
+ t.shape({1, 2, 3});
+ EXPECT_EQ(3, t.rank());
+ EXPECT_EQ(1, t.dim(0));
+ EXPECT_EQ(2, t.dim(1));
+ EXPECT_EQ(3, t.dim(2));
+
+ // Check dtype
+ t.dtype(DT);
+ EXPECT_EQ(DT, t.dtype());
+
+ // Check buffer
+ t.size<DT>(6);
+ EXPECT_EQ(6 * sizeof(typename loco::DataTypeImpl<DT>::Type), t.byte_size());
+ for (uint32_t i = 0; i < 6; i++)
+ t.at<DT>(i) = i;
+
+ for (uint32_t i = 0; i < 6; i++)
+ EXPECT_EQ(i, t.at<DT>(i));
+}
+
+} // namespace
+
+TEST(CircleEvalDiffTensorTest, constructor)
+{
+ Tensor t;
+
+ EXPECT_EQ(0, t.byte_size());
+ EXPECT_EQ(0, t.rank());
+ EXPECT_EQ(loco::DataType::Unknown, t.dtype());
+}
+
+TEST(CircleEvalDiffTensorTest, getter_setter)
+{
+ test_getter_setter<loco::DataType::S64>();
+ test_getter_setter<loco::DataType::S32>();
+ test_getter_setter<loco::DataType::S16>();
+ test_getter_setter<loco::DataType::U8>();
+ test_getter_setter<loco::DataType::FLOAT32>();
+
+ SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_shape_range_NEG)
+{
+ Tensor t;
+ t.shape({1, 2, 2, 3});
+
+ EXPECT_ANY_THROW(t.dim(4));
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
+{
+ test_out_of_buffer_range<loco::DataType::S64>();
+ test_out_of_buffer_range<loco::DataType::S32>();
+ test_out_of_buffer_range<loco::DataType::S16>();
+ test_out_of_buffer_range<loco::DataType::U8>();
+ test_out_of_buffer_range<loco::DataType::FLOAT32>();
+
+ SUCCEED();
+}
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
index 115d24860..2f657c171 100644
--- a/compiler/circle-execution-plan/CMakeLists.txt
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -1,4 +1,9 @@
set(SOURCES
+ pal/IScratchpadHelper.h
+ pal/ScratchpadHelperLinux.h
+ pal/ScratchpadHelperMCU.h
+ pal/ScratchpadHelperCMSISNN.h
+ pal/TargetPlatform.h
src/CircleExecutionPlan.cpp
src/ExecutionPlanner.cpp
src/ExecutionPlanner.h
@@ -13,4 +18,5 @@ target_link_libraries(circle_execution_plan luci_export)
target_link_libraries(circle_execution_plan luci_plan)
target_link_libraries(circle_execution_plan arser)
+target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md
index e789a55db..dbb7d4f85 100644
--- a/compiler/circle-execution-plan/README.md
+++ b/compiler/circle-execution-plan/README.md
@@ -10,13 +10,12 @@ The output circle file contains plan (`CircleNodeMemoryPlan`) information for ev
- number which determines order in which nodes will be executed
- memory offsets for node output tensors from the beginning of shared memory buffer
-In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`.
-For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data.
+In order to record and read this data, we use `luci::CircleNodeExecutionPlan`.
### Execution plan building
In order to build "execution plan" we use `ExecutionPlanner` class.
-The main method is `get_execution_plan()` which for each node finds and writes to its annotations
+The main method is `make_execution_plan()` which for each node finds and writes to its annotations
"execution plan". For this purpose there are two steps:
- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
diff --git a/compiler/circle-execution-plan/pal/IScratchpadHelper.h b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
new file mode 100644
index 000000000..f5a991526
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+#define CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+
+#include <luci/IR/Nodes/CircleAveragePool2D.h>
+#include <luci/IR/Nodes/CircleBatchMatMul.h>
+#include <luci/IR/Nodes/CircleConv2D.h>
+#include <luci/IR/Nodes/CircleDepthwiseConv2D.h>
+#include <luci/IR/Nodes/CircleSVDF.h>
+#include <cstdint>
+
+namespace circle_planner
+{
+
+class IScratchpadHelper
+{
+public:
+ virtual uint32_t
+ ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) = 0;
+
+ virtual std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) = 0;
+
+ virtual uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) = 0;
+
+ virtual uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) = 0;
+
+ virtual std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) = 0;
+
+ virtual ~IScratchpadHelper() = default;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
new file mode 100644
index 000000000..5369c0937
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+
+#include "IScratchpadHelper.h"
+#include <cassert>
+
+namespace circle_planner
+{
+
+namespace
+{
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+ return padding > 0 ? padding : 0;
+}
+
+} // namespace
+
+class ScratchpadHelperCMSISNN : public IScratchpadHelper
+{
+public:
+ explicit ScratchpadHelperCMSISNN(bool use_dsp) : _use_dsp(use_dsp)
+ {
+ // Do nothing
+ }
+
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // Main logic of arm_avgpool_s8_get_buffer_size
+
+ const auto avg_pool_input = loco::must_cast<luci::CircleNode *>(avg_pool->value());
+
+ if (avg_pool_input->dtype() != loco::DataType::S8 or !_use_dsp)
+ return 0;
+
+ const auto depth = static_cast<int32_t>(avg_pool_input->dim(3).value());
+
+ return depth * sizeof(int32_t);
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ throw std::runtime_error("BatchMatMul is not currently supported for cmsisnn platform");
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+ {
+ // Main logic of arm_convolve_wrapper_s8_get_buffer_size
+
+ const auto dilation_height_factor = static_cast<int32_t>(conv->dilation()->h());
+ const auto dilation_width_factor = static_cast<int32_t>(conv->dilation()->w());
+
+ const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+ if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+ conv_input->dtype() != loco::DataType::S8)
+ {
+ return 0;
+ }
+
+ const auto input_depth = static_cast<int32_t>(conv_input->dim(3).value());
+
+ const auto input_height = static_cast<int32_t>(conv_input->dim(1).value());
+ const auto input_width = static_cast<int32_t>(conv_input->dim(2).value());
+
+ const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+ const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+ const auto stride_height = static_cast<int32_t>(conv->stride()->h());
+ const auto stride_width = static_cast<int32_t>(conv->stride()->w());
+
+ const auto output_height = static_cast<int32_t>(conv->dim(1).value());
+ const auto output_width = static_cast<int32_t>(conv->dim(2).value());
+
+ assert(conv_input->quantparam()->zerop.size() == 1);
+ assert(conv->quantparam()->zerop.size() == 1);
+
+ const auto padding_height = computePadding(stride_height, dilation_height_factor, input_height,
+ filter_height, output_height);
+ const auto padding_width =
+ computePadding(stride_width, dilation_width_factor, input_width, filter_width, output_width);
+
+ if ((padding_width == 0) && (padding_height == 0) && (input_depth % 4 == 0) &&
+ (stride_width == 1) && (stride_height == 1) && (filter_width == 1) && (filter_height == 1))
+ {
+ return 0;
+ }
+
+ if (_use_dsp)
+ {
+ return (2 * input_depth * filter_width * filter_height) * sizeof(int16_t);
+ }
+
+ return 0;
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // Main logic of arm_depthwise_conv_wrapper_s8_get_buffer_size
+
+ const auto dilation_height_factor = static_cast<int32_t>(depthwise_conv->dilation()->h());
+ const auto dilation_width_factor = static_cast<int32_t>(depthwise_conv->dilation()->w());
+
+ const auto depthwise_conv_input = loco::must_cast<luci::CircleNode *>(depthwise_conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(depthwise_conv->filter());
+
+ if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+ depthwise_conv_input->dtype() != loco::DataType::S8)
+ {
+ return 0;
+ }
+
+ const auto input_depth = static_cast<int32_t>(depthwise_conv_input->dim(3).value());
+ const auto output_depth = static_cast<int32_t>(depthwise_conv->dim(3).value());
+ const auto batch_size = static_cast<int32_t>(depthwise_conv_input->dim(0).value());
+
+ if (input_depth != output_depth or batch_size != 1 or !_use_dsp)
+ return 0;
+
+ const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+ const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+ return input_depth * filter_height * filter_width * sizeof(int16_t);
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+
+private:
+ bool _use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
new file mode 100644
index 000000000..811aa67c3
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+
+#include "IScratchpadHelper.h"
+#include <loco/IR/DataTypeTraits.h>
+
+namespace circle_planner
+{
+
+class ScratchpadHelperLinux : public IScratchpadHelper
+{
+public:
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // for linux AveragePool2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ const auto lhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->x());
+ const auto rhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->y());
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ // Scratchpad for lhs
+ uint32_t scratchpad_size = 1;
+ for (int32_t i = 0; i < lhs->rank(); ++i)
+ scratchpad_size *= lhs->dim(i).value();
+
+ scratchpad_sizes.push_back(scratchpad_size * loco::size(lhs->dtype()));
+
+ // Scratchpad for rhs
+ scratchpad_size = 1;
+ for (int32_t i = 0; i < rhs->rank(); ++i)
+ scratchpad_size *= rhs->dim(i).value();
+
+ scratchpad_sizes.push_back(scratchpad_size * loco::size(rhs->dtype()));
+
+ return scratchpad_sizes;
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+ {
+ const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+ const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+ const uint32_t stride_height = conv->stride()->h();
+ const uint32_t stride_width = conv->stride()->w();
+
+ const uint32_t dilation_height_factor = conv->dilation()->h();
+ const uint32_t dilation_width_factor = conv->dilation()->w();
+
+ const uint32_t filter_height = filter->dim(1).value();
+ const uint32_t filter_width = filter->dim(2).value();
+
+ const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+ const bool need_non_dilated_im2col =
+ stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+ const bool need_im2col = conv_input->dtype() != loco::DataType::S16 &&
+ (need_dilated_im2col || need_non_dilated_im2col);
+
+ if (!need_im2col)
+ {
+ return 0;
+ }
+
+ const uint32_t input_depth = conv_input->dim(3).value();
+ const uint32_t batches = conv_input->dim(0).value();
+
+ const uint32_t output_height = conv->dim(1).value();
+ const uint32_t output_width = conv->dim(2).value();
+
+ return batches * output_height * output_width * input_depth * filter_height * filter_width *
+ size(conv_input->dtype());
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // for linux DepthwiseConv2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
new file mode 100644
index 000000000..14b41640c
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+
+#include "IScratchpadHelper.h"
+
+namespace circle_planner
+{
+
+class ScratchpadHelperMCU : public IScratchpadHelper
+{
+public:
+ uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+ {
+ // for mcu AveragePool2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t>
+ ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+ {
+ throw std::runtime_error("BatchMatMul is not currently supported for mcu platform");
+ }
+
+ uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *) final
+ {
+ // for mcu scratchpad size = 0
+ return 0;
+ }
+
+ uint32_t
+ ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+ {
+ // for mcu DepthwiseConv2d scratchpad tensors size = 0
+ return 0;
+ }
+
+ std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+ {
+ const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+ const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+ if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+ (weight_feature_input->dtype() == loco::DataType::S8 or
+ weight_feature_input->dtype() == loco::DataType::U8))
+ {
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ std::vector<uint32_t> scratchpad_sizes;
+
+ const auto batch_size = svdf_input->dim(0).value();
+ const auto num_filters = weight_feature_input->dim(0).value();
+ const auto rank = svdf->svdf_rank();
+ const auto num_units = num_filters / rank;
+
+ if (svdf_input->dtype() == loco::DataType::S8)
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+ scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+ }
+ else
+ {
+ scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+ }
+
+ return scratchpad_sizes;
+ }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Status.h b/compiler/circle-execution-plan/pal/TargetPlatform.h
index 6295a7e77..538a502fe 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/Status.h
+++ b/compiler/circle-execution-plan/pal/TargetPlatform.h
@@ -1,6 +1,5 @@
/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,15 +14,25 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
+#ifndef CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+#define CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
-#include "absl/status/status.h" // IWYU pragma: export
-#define RETURN_IF_ERROR(s) \
- { \
- auto c = (s); \
- if (!c.ok()) \
- return c; \
- } // IWYU pragma: export
+namespace circle_planner
+{
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
+enum SupportedPlatformType
+{
+ LINUX,
+ MCU,
+ CMSISNN
+};
+
+struct TargetPlatform
+{
+ SupportedPlatformType platform_type;
+ bool use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
index a54100b8c..1788124c3 100644
--- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -35,6 +35,18 @@ int entry(int argc, char **argv)
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument("--platform")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("linux")
+ .help("Platform name: linux mcu cmsisnn");
+ arser.add_argument("--use_dsp")
+ .nargs(1)
+ .type(arser::DataType::BOOL)
+ .required(false)
+ .default_value(false)
+ .help("Plan with or without dsp (now can be used only with cmsisnn)");
try
{
@@ -47,8 +59,35 @@ int entry(int argc, char **argv)
return 255;
}
- std::string input_path = arser.get<std::string>("input");
- std::string output_path = arser.get<std::string>("output");
+ const std::string input_path = arser.get<std::string>("input");
+ const std::string output_path = arser.get<std::string>("output");
+ const std::string platform_name = arser.get<std::string>("--platform");
+ const bool use_dsp = arser.get<bool>("--use_dsp");
+
+ if (platform_name != "cmsisnn" && use_dsp)
+ {
+ std::cerr << "ERROR: Now use_dsp can be used only with cmsisnn" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ circle_planner::SupportedPlatformType platform_type;
+ if (platform_name == "linux")
+ {
+ platform_type = circle_planner::SupportedPlatformType::LINUX;
+ }
+ else if (platform_name == "mcu")
+ {
+ platform_type = circle_planner::SupportedPlatformType::MCU;
+ }
+ else if (platform_name == "cmsisnn")
+ {
+ platform_type = circle_planner::SupportedPlatformType::CMSISNN;
+ }
+ else
+ {
+ std::cerr << "ERROR: Invalid platform name '" << platform_name << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
foder::FileLoader file_loader{input_path};
std::vector<char> model_data;
@@ -82,8 +121,8 @@ int entry(int argc, char **argv)
auto module = importer.importModule(circle_model);
// Do main job
- luci::ExecutionPlanner execution_planner(module->graph());
- execution_planner.get_execution_plan();
+ circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
+ execution_planner.make_execution_plan();
// Export to output Circle file
luci::CircleExporter exporter;
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
index c37d1e5f5..ec2ec1362 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -18,72 +18,49 @@
#include <loco/IR/Algorithm.h>
#include <luci/UserSettings.h>
-namespace luci
+namespace circle_planner
{
namespace
{
-constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max();
+constexpr uint32_t node_not_assigned = std::numeric_limits<int32_t>::max();
-uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size,
- uint32_t stride, uint32_t dilation_rate = 1)
+bool isExecutableNode(const luci::CircleNode *node)
{
- const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
- switch (padding)
+ switch (node->opcode())
{
- case Padding::SAME:
- return (image_size + stride - 1) / stride;
- case Padding::VALID:
- return (image_size + stride - effective_filter_size) / stride;
+ // The following nodes denote outputs of multiple-output nodes.
+ // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+ case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLESPLITOUT:
+ case luci::CircleOpcode::CIRCLESPLITVOUT:
+ case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ case luci::CircleOpcode::CIRCLEWHILEOUT:
+ return false;
default:
- assert(false);
+ return true;
}
}
-// Method finds (if necessary) size for im2col temporary tensor.
-uint32_t compute_im2col_size(const luci::CircleConv2D *conv)
+bool isTensorProducingNode(const luci::CircleNode *node)
{
- auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
- auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
- auto padding = (conv->padding());
- uint32_t stride_height = conv->stride()->h();
- uint32_t stride_width = conv->stride()->w();
-
- uint32_t dilation_height_factor = conv->dilation()->h();
- uint32_t dilation_width_factor = conv->dilation()->w();
-
- uint32_t filter_height = filter->dim(1).value();
- uint32_t filter_width = filter->dim(2).value();
-
- const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
- const bool need_non_dilated_im2col =
- stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
- bool need_im2col =
- conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
-
- if (!need_im2col)
+ switch (node->opcode())
{
- return 0;
+ // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+ // are produced by the corresponding *Out nodes instead.
+ // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+ case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::UNPACK:
+ return false;
+ default:
+ return true;
}
-
- uint32_t input_depth = conv_input->dim(3).value();
- uint32_t input_height = conv_input->dim(1).value();
- uint32_t input_width = conv_input->dim(2).value();
-
- uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height,
- dilation_height_factor);
- uint32_t output_width =
- compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor);
-
- uint32_t batches = conv_input->dim(0).value();
-
- return batches * output_height * output_width * input_depth * filter_height * filter_width *
- size(conv_input->dtype());
}
} // namespace
-void ExecutionPlanner::get_execution_plan()
+void ExecutionPlanner::make_execution_plan()
{
get_default_execution_order_plan();
_required_size = get_offsets_with_greedy_by_size();
@@ -106,23 +83,23 @@ void ExecutionPlanner::get_default_execution_order_plan()
void ExecutionPlanner::get_usage_interval()
{
// Initialize vectors of first and last nodes for usage interval
- _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
- _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+ _alloc_node.assign(_ordered_nodes.size(), node_not_assigned);
+ _dealloc_node.assign(_ordered_nodes.size(), node_not_assigned);
// Vector for count usages
std::vector<int> usages_counts(_ordered_nodes.size(), 0);
auto allocate = [this](uint32_t node, uint32_t tensor) {
- if (_alloc_node[tensor] != nodeNotAssigned)
+ if (_alloc_node[tensor] != node_not_assigned)
{
return;
}
- assert(_dealloc_node[tensor] == nodeNotAssigned);
+ assert(_dealloc_node[tensor] == node_not_assigned);
_alloc_node[tensor] = node;
};
auto deallocate = [this](uint32_t node, uint32_t tensor) {
- assert(_dealloc_node[tensor] == nodeNotAssigned);
+ assert(_dealloc_node[tensor] == node_not_assigned);
_dealloc_node[tensor] = node;
};
@@ -158,13 +135,24 @@ void ExecutionPlanner::get_usage_interval()
for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
{
const auto node = _ordered_nodes.at(i);
+ auto prev_nodes = preds(node);
if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
{
allocate(0, i);
}
- allocate(i, i);
+ else if (!isExecutableNode(loco::must_cast<luci::CircleNode *>(node)))
+ {
+ // If current node is multi output node than begin life time for current node should start
+ // when prev node start live
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), *prev_nodes.begin());
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ allocate(index, i);
+ }
+ else
+ {
+ allocate(i, i);
+ }
- auto prev_nodes = preds(node);
for (auto &prev_node : prev_nodes)
{
auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
@@ -203,7 +191,7 @@ uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
uint32_t ExecutionPlanner::greedy_by_size_approach()
{
size_t result_size = 0;
- create_alloc_node_inform_vector(false, false, false);
+ create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads);
std::vector<AllocationNodeInformation> ordered_alloc_inform;
for (auto &current_node : _alloc_node_inform_vector)
{
@@ -250,22 +238,22 @@ uint32_t ExecutionPlanner::greedy_by_size_approach()
}
void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
- bool null_im2col)
+ bool null_scratchpad)
{
auto node_compare = [this](const AllocationNodeInformation &alloc_1,
const AllocationNodeInformation &alloc_2) {
auto idx1 = alloc_1.node_num;
auto idx2 = alloc_2.node_num;
- if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned)
+ if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == node_not_assigned)
{
- if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
{
return idx1 < idx2;
}
return true;
}
- if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
{
return false;
}
@@ -305,30 +293,66 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
{
_alloc_node_inform_vector[i].size = 0;
}
+ else if (!isTensorProducingNode(circle_node))
+ {
+ _alloc_node_inform_vector[i].size = 0;
+ }
else
{
_alloc_node_inform_vector[i].size = node_size;
}
- // Im2col
- auto opcode = circle_node->opcode();
- if (opcode == luci::CircleOpcode::CONV_2D)
+ // Scratchpad If needed
+ std::vector<uint32_t> scratchpad_sizes;
+ if (!null_scratchpad)
{
- auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
- auto im2col_size = compute_im2col_size(conv);
- if (im2col_size > 0)
+ switch (circle_node->opcode())
{
- AllocationNodeInformation temp_alloc;
-
- if (null_im2col)
+ case luci::CircleOpcode::AVERAGE_POOL_2D:
{
- temp_alloc.size = 0;
+ const auto avg_pool = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+ scratchpad_sizes.push_back(
+ _scratchpad_helper->ComputeScratchpadSizeAveragePool2d(avg_pool));
+ break;
}
- else
+ case luci::CircleOpcode::BATCH_MATMUL:
{
- temp_alloc.size = im2col_size;
+ const auto batch_mat_mul = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+ scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeBatchMatMul(batch_mat_mul);
+ break;
}
+ case luci::CircleOpcode::CONV_2D:
+ {
+ const auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+ scratchpad_sizes.push_back(_scratchpad_helper->ComputeScratchpadSizeConv2d(conv));
+ break;
+ }
+ case luci::CircleOpcode::DEPTHWISE_CONV_2D:
+ {
+ const auto depthwise_conv =
+ loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+ scratchpad_sizes.push_back(
+ _scratchpad_helper->ComputeScratchpadSizeDepthwiseConv2d(depthwise_conv));
+ break;
+ }
+ case luci::CircleOpcode::SVDF:
+ {
+ const auto svdf = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+ scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeSVDF(svdf);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ for (const auto scratchpad_size : scratchpad_sizes)
+ {
+ if (scratchpad_size > 0)
+ {
+ AllocationNodeInformation temp_alloc;
+ temp_alloc.size = scratchpad_size;
temp_alloc.first_node = i - 1;
temp_alloc.last_node = i + 1;
temp_alloc.node_num = i;
@@ -352,7 +376,7 @@ void ExecutionPlanner::dump_inform()
{
auto current_node_it = std::find_if(
_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
- [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+ [i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
{
auto first_node = _alloc_node[j];
@@ -360,7 +384,7 @@ void ExecutionPlanner::dump_inform()
auto it = std::find_if(
_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
- [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+ [j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
if (i >= first_node && i <= last_node)
{
current_node_it->breadth += it->size;
@@ -386,4 +410,4 @@ void ExecutionPlanner::dump_inform()
});
}
-} // namespace luci
+} // namespace circle_planner
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
index 8e3d9b46a..e0833c407 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.h
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -17,10 +17,15 @@
#ifndef CIRCLE_EXECUTION_PLANNER_H
#define CIRCLE_EXECUTION_PLANNER_H
+#include "TargetPlatform.h"
+#include "IScratchpadHelper.h"
+#include "ScratchpadHelperLinux.h"
+#include "ScratchpadHelperMCU.h"
+#include "ScratchpadHelperCMSISNN.h"
#include <luci/IR/Module.h>
#include <luci/Plan/CircleNodeExecutionPlan.h>
-namespace luci
+namespace circle_planner
{
// struct for additional information for the node. it helps build allocations plan for nodes.
struct AllocationNodeInformation
@@ -50,7 +55,7 @@ struct AllocationNodeInformation
uint32_t last_node;
// is the current node temporary or not
bool is_temp;
- // operation breadth of current node
+ // Breadth is a sum of live tensors sizes at the moment of execution of given node
uint32_t breadth;
bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
@@ -60,12 +65,44 @@ class ExecutionPlanner
{
public:
ExecutionPlanner() = delete;
- explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; };
+ explicit ExecutionPlanner(loco::Graph *graph) : _graph(graph)
+ {
+ _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+ }
+
+ explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph)
+ {
+ switch (target_platform.platform_type)
+ {
+ case LINUX:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+ break;
+ case MCU:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperMCU>();
+ break;
+ case CMSISNN:
+ _scratchpad_helper = std::make_unique<ScratchpadHelperCMSISNN>(target_platform.use_dsp);
+ break;
+ default:
+ assert(false && "Use unsupported platform");
+ }
+ };
// Method provides execution plan, which contains execution order and
// memory offsets for all nodes in _graph.
// This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
- void get_execution_plan();
+ void make_execution_plan();
+
+ // Method change planning mode:
+ // is_null_consts = true - constants are no longer taken into account when planning
+ // is_null_inputs = true - input are no longer taken into account when planning
+ // is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+ void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads)
+ {
+ _is_null_consts = is_null_consts;
+ _is_null_inputs = is_null_inputs;
+ _is_null_scratchpads = is_null_scratchpads;
+ };
private:
// Method gets default execution order plan and saves it in _ordered_nodes vector.
@@ -83,18 +120,19 @@ private:
// Return: required size of buffer.
uint32_t get_offsets_with_greedy_by_size();
- // Realization of greedy by size approach to find offsets for nodes.
+ // Realization of greedy by size approach (algorithm is mentioned in
+ // "EFFICIENT MEMORY MANAGEMENT FOR DEEP NEURAL NET INFERENCE" paper) to find offsets for nodes.
uint32_t greedy_by_size_approach();
// Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
// null_consts = true - size of const nodes will be equal 0;
// null_inputs = true - size of input nodes will be equal 0;
- // null_im2col = true - size of im2col nodes will be equal 0;
- // It using if we don't want to take input(const or im2col) nodes into account
+ // null_scratchpad = true - size of scratchpad nodes will be equal 0;
+ // It using if we don't want to take input(const or scratchpads) nodes into account
// when determining offsets and calculating the required buffer size. This is uses for
// experiments.
void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
- bool null_im2col = false);
+ bool null_scratchpad = false);
// Stores allocation additional information for the all nodes from _graph.
std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
@@ -121,10 +159,21 @@ private:
loco::Graph *_graph;
+ // Calculate size of scratchpad tensors for current platform
+ std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
+
// Required memory size.
uint32_t _required_size = 0;
+
+ // Flags for choosing different planning modes:
+ // _is_null_consts = true - constants are no longer taken into account when planning
+ // _is_null_inputs = true - input are no longer taken into account when planning
+ // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+ bool _is_null_consts = false;
+ bool _is_null_inputs = false;
+ bool _is_null_scratchpads = false;
};
-} // namespace luci
+} // namespace circle_planner
#endif // CIRCLE_EXECUTION_PLANNER_H
diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt
index d0775ea2d..10d26d191 100644
--- a/compiler/circle-inspect/CMakeLists.txt
+++ b/compiler/circle-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
set(DRIVER "driver/Driver.cpp")
@@ -10,5 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES})
target_include_directories(circle-inspect PRIVATE src)
target_link_libraries(circle-inspect arser)
target_link_libraries(circle-inspect foder)
-target_link_libraries(circle-inspect mio_circle)
+target_link_libraries(circle-inspect mio_circle04)
+target_link_libraries(circle-inspect mio_circle04_helper)
target_link_libraries(circle-inspect safemain)
diff --git a/compiler/circle-inspect/README.md b/compiler/circle-inspect/README.md
index 1f76c8ede..94eea7b08 100644
--- a/compiler/circle-inspect/README.md
+++ b/compiler/circle-inspect/README.md
@@ -20,3 +20,19 @@ ADD
```
To get the count of specific operator, use other tools like sort, uniq, etc.
+
+Operators with `--tensor_dtype`
+- show name and dtype of each tensor one line at a time
+
+Example
+```
+$ circle-inspect --tensor_dtype quantized_conv2d.circle
+```
+
+Result
+```
+ifm UINT8
+weights UINT8
+bias INT32
+ofm UINT8
+```
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp
index a450fd9e0..10e185de5 100644
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -35,6 +35,7 @@ int entry(int argc, char **argv)
.nargs(0)
.help("Dump Conv2D series weight operators in circle file");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
+ arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
try
@@ -48,7 +49,8 @@ int entry(int argc, char **argv)
return 255;
}
- if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"])
+ if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] &&
+ !arser["--tensor_dtype"])
{
std::cout << "At least one option must be specified" << std::endl;
std::cout << arser;
@@ -63,6 +65,8 @@ int entry(int argc, char **argv)
dumps.push_back(std::make_unique<circleinspect::DumpConv2DWeight>());
if (arser["--op_version"])
dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>());
+ if (arser["--tensor_dtype"])
+ dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>());
std::string model_file = arser.get<std::string>("circle");
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
index 81e0f0dbd..362d67cf4 100644
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,3 +1,3 @@
require("arser")
-require("mio-circle")
+require("mio-circle04")
require("safemain")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp
index 5c71afb3f..bba5e56c3 100644
--- a/compiler/circle-inspect/src/Dump.cpp
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -175,3 +175,28 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
}
} // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
+{
+ circleinspect::Reader reader(model);
+
+ const uint32_t subgraph_size = reader.num_subgraph();
+
+ for (uint32_t g = 0; g < subgraph_size; g++)
+ {
+ reader.select_subgraph(g);
+ auto tensors = reader.tensors();
+
+ for (uint32_t i = 0; i < tensors->Length(); ++i)
+ {
+ const auto tensor = tensors->Get(i);
+
+ os << reader.tensor_name(tensor) << " " << reader.tensor_dtype(tensor) << std::endl;
+ }
+ }
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Dump.h b/compiler/circle-inspect/src/Dump.h
index 996c421f9..8ca6838d1 100644
--- a/compiler/circle-inspect/src/Dump.h
+++ b/compiler/circle-inspect/src/Dump.h
@@ -60,6 +60,15 @@ public:
void run(std::ostream &os, const circle::Model *model);
};
+class DumpTensorDType final : public DumpInterface
+{
+public:
+ DumpTensorDType() = default;
+
+public:
+ void run(std::ostream &os, const circle::Model *model);
+};
+
} // namespace circleinspect
#endif // __DUMP_H__
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp
index 7807db38a..0e2865254 100644
--- a/compiler/circle-inspect/src/Reader.cpp
+++ b/compiler/circle-inspect/src/Reader.cpp
@@ -16,66 +16,14 @@
#include "Reader.h"
+#include <mio_circle/Helper.h>
+
#include <sstream>
#include <string>
namespace circleinspect
{
-bool is_valid(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
- assert(opcode);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (!opcode->custom_code())
- return "(invalid custom)";
-
- std::string custom_op = "CUSTOM(";
- custom_op += opcode->custom_code()->c_str();
- custom_op += ")";
- return custom_op;
- }
-
- circle::BuiltinOperator code = opcode->builtin_code();
- return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
- return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- auto name = tensor->name();
- if (name)
- return name->c_str();
-
- return kEmptyTensorName;
-}
-
Reader::Reader(const circle::Model *model)
{
_subgraphs = model->subgraphs();
@@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return mio::circle::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const circle::Operator *op) const
@@ -131,14 +79,24 @@ std::string Reader::opcode_name(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::circle::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- return circleinspect::opcode_name(opcode);
+ return mio::circle::opcode_name(opcode);
+}
+
+std::string Reader::tensor_name(const circle::Tensor *tensor) const
+{
+ return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const circle::Tensor *tensor) const
+{
+ return mio::circle::tensor_type(tensor);
}
bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h
index b5a99df3f..c38ec3990 100644
--- a/compiler/circle-inspect/src/Reader.h
+++ b/compiler/circle-inspect/src/Reader.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
/**
* @brief Loads Circle file and provides helpers to access attributes
*/
@@ -71,6 +65,8 @@ public:
size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
std::string opcode_name(const circle::Operator *op) const;
+ std::string tensor_name(const circle::Tensor *tensor) const;
+ std::string tensor_dtype(const circle::Tensor *tensor) const;
public:
bool select_subgraph(uint32_t subgraph);
diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md
index c06899ab5..5ea2d32c4 100644
--- a/compiler/circle-opselector/README.md
+++ b/compiler/circle-opselector/README.md
@@ -1,21 +1,21 @@
-# circle-opselector
-
-`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
-
-## Example
-
-### 1. Select from location numbers
-
-```bash
-./circle-opselector --by_id "1-3,5" input.circle output.circle
-```
-
-Then, output.circle which has node 1, 2, 3 and 5 will be created.
-
-### 2. Select from node names
-
-```bash
-./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
-```
-
-Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
+# circle-opselector
+
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
+
+## Example
+
+### 1. Select from location numbers
+
+```bash
+./circle-opselector --by_id "1-3,5" input.circle output.circle
+```
+
+Then, output.circle which has node 1, 2, 3 and 5 will be created.
+
+### 2. Select from node names
+
+```bash
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
+```
+
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt
index 1cfbcbd9b..0657607d2 100644
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -82,8 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
# Run partitioner
add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
- COMMAND circle_partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
- DEPENDS circle_partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
+ COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+ DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
)
list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON})
@@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
add_test(NAME circle_part_value_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
- "${NNCC_OVERLAY_DIR}/venv_2_6_0"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
"$<TARGET_FILE:circle_part_driver>"
${PARTITION_LIST}
)
diff --git a/compiler/circle-part-value-test/part_eval_one.py b/compiler/circle-part-value-test/part_eval_one.py
index 91e32d78f..44661c78b 100755
--- a/compiler/circle-part-value-test/part_eval_one.py
+++ b/compiler/circle-part-value-test/part_eval_one.py
@@ -53,21 +53,37 @@ except:
interpreter = tf.lite.Interpreter(tflite_model)
interpreter.allocate_tensors()
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+ signature_serving_default = full_signatures.get('serving_default', None)
+ if signature_serving_default != None:
+ signature_outputs = signature_serving_default['outputs']
+
+ full_signatures_outputs_remap = []
+ for index, (key, value) in enumerate(signature_outputs.items()):
+ full_signatures_outputs_remap.append(value)
+
# Generate random input data.
num_inputs = len(interpreter.get_input_details())
for i in range(num_inputs):
input_details = interpreter.get_input_details()[i]
- if input_details["dtype"] == np.float32:
+ input_details_dtype = input_details["dtype"]
+ input_details_shape = input_details["shape"]
+ if input_details_dtype == np.float32:
input_data = np.array(
- np.random.random_sample(input_details["shape"]), input_details["dtype"])
- elif input_details["dtype"] == np.uint8:
+ np.random.random_sample(input_details_shape), input_details_dtype)
+ elif input_details_dtype == np.int16:
input_data = np.array(
- np.random.randint(0, 256, size=input_details["shape"]),
- input_details["dtype"])
- elif input_details["dtype"] == np.bool_:
+ np.random.randint(0, 100, size=input_details_shape), input_details_dtype)
+ elif input_details_dtype == np.uint8:
input_data = np.array(
- np.random.choice(a=[True, False], size=input_details["shape"]),
- input_details["dtype"])
+ np.random.randint(0, 256, size=input_details_shape), input_details_dtype)
+ elif input_details_dtype == np.bool_:
+ input_data = np.array(
+ np.random.choice(a=[True, False], size=input_details_shape),
+ input_details_dtype)
else:
raise SystemExit("Unsupported input dtype")
@@ -90,52 +106,42 @@ print("", flush=True)
subprocess.run(partition_command, check=True)
# Compare the results.
-for idx in range(len(interpreter.get_output_details())):
- output_details = interpreter.get_output_details()[idx]
- output_data = np.fromfile(circle_model + ".output" + str(idx),
- output_details["dtype"])
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+ output_details = inpt_output_details[idx]
+ output_dtype = output_details["dtype"]
+ output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
output_shape = [int(i) for i in shape_file.read().split(',')]
luci_output_data = np.reshape(output_data, output_shape)
+ output_tensor = output_details["index"]
+ if full_signatures_outputs_remap != None:
+ output_tensor = full_signatures_outputs_remap[idx]
+ intp_output_data = interpreter.get_tensor(output_tensor)
try:
- if output_details["dtype"] == np.uint8:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if output_dtype == np.uint8:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
- elif output_details["dtype"] == np.float32:
+ elif output_dtype == np.float32:
if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=1.e-5,
- atol=1.e-5) == False:
+ luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
- elif output_details["dtype"] == np.int64:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ elif output_dtype == np.int64:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
- elif output_details["dtype"] == np.int32:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ elif output_dtype == np.int32:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_dtype == np.int16:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
else:
- raise SystemExit("Unsupported data type: ", output_details["dtype"])
+ raise SystemExit("Unsupported data type: ", output_dtype)
except:
print(traceback.format_exc())
quit(255)
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part
new file mode 100644
index 000000000..496971e55
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part
new file mode 100644
index 000000000..9913fea96
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+UNPACK=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part
new file mode 100644
index 000000000..c63efc592
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+UNPACK=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part
new file mode 100644
index 000000000..ad0842165
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+MUL=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part
new file mode 100644
index 000000000..c82b741b0
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SQRT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part
new file mode 100644
index 000000000..d9d2a8e59
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Split_Add_000.part b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part
new file mode 100644
index 000000000..91af566cd
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SPLIT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part
new file mode 100644
index 000000000..d4d439d27
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part
new file mode 100644
index 000000000..dbd174ee1
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+TANH=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part
new file mode 100644
index 000000000..475439a9d
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part
new file mode 100644
index 000000000..d9d2a8e59
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/test.lst b/compiler/circle-part-value-test/test.lst
index af2f5ba5c..b7a3f403a 100644
--- a/compiler/circle-part-value-test/test.lst
+++ b/compiler/circle-part-value-test/test.lst
@@ -35,3 +35,24 @@ add(Part_If_Add_Sub_001 Part_If_Add_Sub_001.001 3)
# WHILE with subgraphs
add(Part_While_000 Part_While_000 3)
add(Part_While_001 Part_While_001 3)
+
+# UNPACK with multiple outputs
+add(Net_UnpackAdd_001 Net_UnpackAdd_001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.002 2)
+
+# Other multiple outputs
+add(Part_Split_Add_000 Part_Split_Add_000 2)
+
+# test SignatureDef, with any OPCODE
+add(SignatureDef_MultiOut_000 SignatureDef_MultiOut_000 0)
+add(SignatureDef_MultiOut_001 SignatureDef_MultiOut_001 0)
+
+# FC with nobias
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias 1)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_001 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_002 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_003 2)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_000 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_001 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_002 0)
diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt
index ed8c97948..e29a66b41 100644
--- a/compiler/circle-partitioner-test/CMakeLists.txt
+++ b/compiler/circle-partitioner-test/CMakeLists.txt
@@ -57,8 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
# Run partitioner
set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json")
add_custom_command(OUTPUT ${PART_CONN_JSON}
- COMMAND circle_partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
- DEPENDS circle_partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
+ COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
+ DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
)
# NOTE this is checked in build time and not added with 'add_test' command
diff --git a/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part
new file mode 100644
index 000000000..01b8c704e
--- /dev/null
+++ b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-partitioner-test/test.lst b/compiler/circle-partitioner-test/test.lst
index b731f8d0e..c0c185c7e 100644
--- a/compiler/circle-partitioner-test/test.lst
+++ b/compiler/circle-partitioner-test/test.lst
@@ -5,3 +5,7 @@
# add(RECIPE_NAME PART_NAME)
add(Net_InstanceNorm_003 Net_InstanceNorm_003)
+
+# NOTE SVDF partition test is done here as value test may need custom tolerance
+# TODO move Part_Add_SVDF_000 to circle-part-value-test when ready
+add(Part_Add_SVDF_000 Part_Add_SVDF_000)
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt
index 28a16c9fc..9b8f5afae 100644
--- a/compiler/circle-partitioner/CMakeLists.txt
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -1,5 +1,24 @@
file(GLOB_RECURSE SOURCES "src/*.cpp")
+add_executable(circle-partitioner "${SOURCES}")
+target_link_libraries(circle-partitioner foder)
+target_link_libraries(circle-partitioner crew)
+target_link_libraries(circle-partitioner safemain)
+target_link_libraries(circle-partitioner luci_lang)
+target_link_libraries(circle-partitioner luci_log)
+target_link_libraries(circle-partitioner luci_import)
+target_link_libraries(circle-partitioner luci_service)
+target_link_libraries(circle-partitioner luci_pass)
+target_link_libraries(circle-partitioner luci_export)
+target_link_libraries(circle-partitioner luci_partition)
+target_link_libraries(circle-partitioner arser)
+target_link_libraries(circle-partitioner pepper_csv2vec)
+target_link_libraries(circle-partitioner vconone)
+target_link_libraries(circle-partitioner nncc_common)
+
+install(TARGETS circle-partitioner DESTINATION bin)
+
+# TODO remove circle_partitioner
add_executable(circle_partitioner "${SOURCES}")
target_link_libraries(circle_partitioner foder)
target_link_libraries(circle_partitioner crew)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md
index 5fd312e33..2e0a98638 100644
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -94,7 +94,7 @@ Net_InstanceNorm_003/
Command example
```
-./circle_partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
+./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
```
Result of _circle-partitioner_
@@ -163,3 +163,131 @@ as the `source` model: `[ "Input" ]`.
`Net_InstanceNorm_003.00002_acl_cl.circle` which they should be connected.
- And `outputs` `[ "Div" ]` should be connected to `inputs` of
third model `Net_InstanceNorm_003.00003_cpu.circle`.
+
+### Execution example
+
+Consider partitioning with backends of OneRT
+- `cpu`, `acl_cl`, `acl_neon`, `ruy`, `xnnpack`
+
+Let's try with this command:
+```
+circle_partitioner \
+ --partition Net_InstanceNorm_003.part \
+ --backends cpu,acl_cl \
+ --default cpu \
+ Net_InstanceNorm_003.circle Net_InstanceNorm_003
+```
+
+where `Net_InstanceNorm_003.part` is like this for initial design
+```
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
+```
+where in `[partition]` section,
+- `backends` is available backends and can be overridden by `--backends`
+- `default` is default backend for OpCodes not assigned in `[OPCODE]` section can be overridden by `--default`
+- `comply` is which rule to apply, where only `opcode` is available for now
+
+#### Use Op name to assign backend
+
+```
+[OP]
+Reduction_indices=GPU
+```
+- there are very long names that may be inconvenient
+
+### Partitioned output
+
+#### Output files
+
+After partition is applied, output files will look something like these
+- `Net_InstanceNorm_003.part.00001_cpu.circle`
+- `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+- `Net_InstanceNorm_003.part.00003_cpu.circle`
+- `Net_InstanceNorm_003.part.conn.ini`
+- `Net_InstanceNorm_003.part.conn.json`
+
+Assume only `Div` node is assigned to `acl_cl`
+
+#### Connection information of partitioned circle files
+
+##### Format with ini
+- `Net_InstanceNorm_003.conn.ini` provides connection of each circle files.
+```
+[source]
+file=Net_InstanceNorm_003.circle
+i1=Input
+o1=Add_as_terminal
+
+[models]
+m1=Net_InstanceNorm_003.part.00001_cpu.circle
+m2=Net_InstanceNorm_003.part.00002_acl_cl.circle
+m3=Net_InstanceNorm_003.part.00003_cpu.circle
+
+[Net_InstanceNorm_003.part.00001_cpu.circle]
+file=Net_InstanceNorm_003.part.00001_cpu.circle
+i1=Input
+o1=Pow
+o2=Sub
+
+[Net_InstanceNorm_003.part.00002_acl_cl.circle]
+file=Net_InstanceNorm_003.part.00002_acl_cl.circle
+i1=Sub
+i2=Pow
+o1=Div
+
+[Net_InstanceNorm_003.part.00003_cpu.circle]
+file=Net_InstanceNorm_003.part.00003_cpu.circle
+i1=Div
+o1=Add_as_terminal
+```
+
+Predefined section
+- `source`: Source circle model information. Has `file` as filename, `iN` for inputs and `oN` for outputs.
+- `models`: Partitioned circle models. Has `mN` for model filename.
+
+Partitioned Model section
+- `iN`: inputs of this model
+- `oN`: outputs of this model
+
+In graph diagram, output order of `Net_InstanceNorm_003.part.00001_cpu.circle`
+looks like `Pow,Sub` but `Div` Op in `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+requires order of `Sub,Pow`.
+
+##### Format with JSON
+- Use JSON format, `Net_InstanceNorm_003.part.conn.json`
+```json
+{
+ "source" : {
+ "file" : "Net_InstanceNorm_003.circle",
+ "inputs" : [ "Input" ],
+ "outputs" : [ "Add_as_terminal" ]
+ },
+ "parts" : [
+ {
+ "file" : "Net_InstanceNorm_003.part.00001_cpu.circle",
+ "inputs" : [ "Input" ],
+ "outputs" : [ "Pow", "Sub" ],
+ },
+ {
+ "file" : "Net_InstanceNorm_003.part.00002_acl_cl.circle",
+ "inputs" : [ "Pow", "Sub" ],
+ "outputs" : [ "Div" ]
+ },
+ {
+ "file" : "Net_InstanceNorm_003.part.00003_cpu.circle",
+ "inputs" : [ "Div" ],
+ "outputs" : [ "Add_as_terminal" ]
+ }
+ ]
+}
+```
+
+### Future works
+
+How to partition with multiple inputs?
diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
new file mode 100644
index 000000000..5ec8b6ee5
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
@@ -0,0 +1,144 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS circle-inspect)
+list(APPEND REQUIRED_TARGETS circle-verify)
+list(APPEND REQUIRED_TARGETS circle-quantizer)
+list(APPEND REQUIRED_TARGETS record-minmax)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+set(options USE_QCONFIG)
+set(oneValueArgs DTYPE GRANULARITY)
+set(multiValueArgs "")
+
+macro(Add RECIPE)
+ cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ set(QCONFIG_OPT "")
+ if(ARG_USE_QCONFIG)
+ set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+ endif()
+
+ set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+ set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle")
+ set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+ set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+ # Generate quantized .circle
+ add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+ DEPENDS
+ circle-quantizer
+ record-minmax
+ ${CIRCLE_PATH}
+ COMMENT "Generate ${RECIPE}.q.circle"
+ )
+
+ list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+ list(APPEND TEST_NAMES ${RECIPE})
+endmacro(Add)
+
+# Macro to generate fully fake-quantized models
+macro(AddFakeQuant RECIPE)
+ set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+ # NOTE We use .q.circle because it is convention for output file (see testall.sh for more details)
+ set(FULL_FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+ # Generate fully fake-quantized .circle
+ add_custom_command(OUTPUT ${FULL_FAKE_QUANT_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:circle-quantizer> --fake_quantize ${CIRCLE_PATH} ${FULL_FAKE_QUANT_CIRCLE_PATH}
+ DEPENDS
+ circle-quantizer
+ ${CIRCLE_PATH}
+ COMMENT "Generate ${RECIPE}.q.circle"
+ )
+
+ list(APPEND TEST_DEPS ${FULL_FAKE_QUANT_CIRCLE_PATH})
+ list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddFakeQuant)
+
+# Read "test.lst"
+include("test.lst")
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+ OUTPUT ${TEST_RUNNER}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+ DEPENDS ${TEST_RUNNER_SOURCE}
+ COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax>\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_QUANTIZER_PATH=\"$<TARGET_FILE:circle-quantizer>\"' >> ${TEST_CONFIG}
+ DEPENDS
+ circle-inspect
+ circle-verify
+ record-minmax
+ circle-quantizer
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(RULE_LIB_SOURCE_PATH "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(RULE_LIB_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+ OUTPUT ${RULE_LIB_BINARY_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${RULE_LIB_SOURCE_PATH}" "${RULE_LIB_BINARY_PATH}"
+ DEPENDS ${RULE_LIB_SOURCE_PATH}
+ COMMENT "Generate rule lib"
+)
+
+list(APPEND TEST_DEPS "${RULE_LIB_BINARY_PATH}")
+
+# Generate dependencies
+add_custom_target(circle_quantizer_dredd_recipe_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_quantizer_dredd_recipe_test common_artifacts_deps)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Run tests
+add_test(
+ NAME circle_quantizer_dredd_recipe_test
+ COMMAND ${TEST_RUNNER}
+ ${TEST_CONFIG}
+ ${ARTIFACTS_BIN_PATH}
+ ${TEST_NAMES}
+)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/README.md b/compiler/circle-quantizer-dredd-recipe-test/README.md
new file mode 100644
index 000000000..61525495a
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/README.md
@@ -0,0 +1,37 @@
+# circle-quantizer-dredd-recipe-test
+
+It tests non-functional conditions of a quantized circle model generated by circle-quantizer.
+
+## How to add a test?
+
+1. Create a directory under `res/TensorFlowLiteRecipes/` or `res/CircleRecipes/`.
+
+2. Make a recipe (`test.recipe`) for fp32 model under the directory.
+
+3. Make a rule (`test.rule`) you want to test under the directory. (For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
+
+4. Add test to `test.lst` in this module with `Add` macro.
+
+```
+Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
+```
+
+- `RECIPE_DIR`: Path to the directory where the recipe file is saved.
+- `DTYPE`: Default quantization dtype (uint8, int16)
+- `GRANULARITY`: Quantization granularity (channel, layer)
+- `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not. If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
+
+## Example
+
+```
+# TensorFlowLiteRecipes
+res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000
+├── test.recipe # What you want to test
+└── test.rule # Non-functional conditions to be satisfied
+└── test.qconf.json # Quantization configuration file (optional)
+
+# test.lst
+...
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+...
+```
diff --git a/compiler/circle-quantizer-dredd-recipe-test/requires.cmake b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake
new file mode 100644
index 000000000..7450f7322
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake
@@ -0,0 +1,6 @@
+require("circle-quantizer")
+require("record-minmax")
+require("circle-inspect")
+require("circle-verify")
+require("common-artifacts")
+require("dredd-rule-lib")
diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst
new file mode 100644
index 000000000..188103016
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst
@@ -0,0 +1,15 @@
+## EXAMPLE
+#
+# Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG(optional))
+# AddFakeQuant(RECIPE_DIR)
+#
+
+## TFLITE RECIPE
+
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+AddFakeQuant(Quant_Add_000)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/testall.sh b/compiler/circle-quantizer-dredd-recipe-test/testall.sh
new file mode 100755
index 000000000..e5d5cf2b8
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/testall.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+ echo "USAGE: $0 ..."
+ echo
+ echo "ARGUMENTS:"
+ echo " [test.config path]"
+ echo " [WORKDIR]"
+ echo " [Prefix1]"
+ echo " [Prefix2]"
+ echo " ..."
+ exit 255
+fi
+
+WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+CONFIG_PATH="$1"; shift
+RESOURCE_DIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
+echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
+echo "-- Found circle-quantizer: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found record-minmax: ${RECORD_MINMAX_PATH}"
+echo "-- Found common-artifacts: ${RESOURCE_DIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd ${WORKDIR}
+while [[ $# -ne 0 ]]; do
+ PREFIX="$1"; shift
+
+ TESTED+=("${PREFIX}")
+
+ PASSED_TAG="${PREFIX}.passed"
+
+ rm -f "${PASSED_TAG}"
+
+ cat > "${PREFIX}.log" <(
+ exec 2>&1
+
+ echo "-- Found circle: ${PREFIX}.q.circle"
+
+ # Exit immediately if any command fails
+ set -e
+ # Show commands
+ set -x
+
+ #
+ # Check if rule is satisfied
+ #
+
+ # Note: turn off 'command printing'. Otherwise printing will be so messy
+ set +x
+
+ # (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh
+ COMPILED_FILE="${PREFIX}.q.circle"
+ INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
+ VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
+ ERROR_LOG="${PREFIX}.error"
+
+ rm -f "${ERROR_LOG}"
+
+ # in case error while running rule-lib.sh, prints error msg
+ trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+ source rule-lib.sh
+ source "${RESOURCE_DIR}/${PREFIX}.rule"
+
+ # unset
+ trap - ERR
+ set -x
+
+ # At this point, the exit code of all commands is 0
+ # If not 0, execution of this script ends because of "set -e"
+ touch "${PASSED_TAG}"
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$PREFIX")
+ else
+ FAILED+=("$PREFIX")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index a5f5f61c4..14e00972b 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,11 +1,19 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+ message(STATUS "Build jsoncpp: FAILED (missing jsoncpp)")
+ return()
+endif(NOT Jsoncpp_FOUND)
+
set (SOURCES src/CircleQuantizer.cpp)
add_executable(circle-quantizer "${SOURCES}")
+target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB})
target_link_libraries(circle-quantizer foder)
target_link_libraries(circle-quantizer safemain)
target_link_libraries(circle-quantizer oops)
target_link_libraries(circle-quantizer loco)
-target_link_libraries(circle-quantizer mio_circle)
target_link_libraries(circle-quantizer luci_import)
target_link_libraries(circle-quantizer luci_service)
target_link_libraries(circle-quantizer luci_pass)
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 57ac30a87..e0c85cb6e 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -17,7 +17,7 @@
#include <foder/FileLoader.h>
#include <luci/Importer.h>
-#include <luci/CircleOptimizer.h>
+#include <luci/CircleQuantizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
@@ -26,6 +26,7 @@
#include <oops/InternalExn.h>
#include <arser/arser.h>
#include <vconone/vconone.h>
+#include <json.h>
#include <functional>
#include <iostream>
@@ -34,8 +35,41 @@
using OptionHook = std::function<int(const char **)>;
-using Algorithms = luci::CircleOptimizer::Options::Algorithm;
-using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename)
+{
+ Json::Value root;
+ std::ifstream ifs(filename);
+
+ // Failed to open cfg file
+ if (not ifs.is_open())
+ throw std::runtime_error("Cannot open config file. " + filename);
+
+ Json::CharReaderBuilder builder;
+ JSONCPP_STRING errs;
+
+ // Failed to parse
+ if (not parseFromStream(builder, ifs, &root, &errs))
+ throw std::runtime_error("Cannot parse config file (json format). " + errs);
+
+ auto layers = root["layers"];
+ std::vector<std::shared_ptr<LayerParam>> p;
+ for (auto layer : layers)
+ {
+ auto l = std::make_shared<LayerParam>();
+ {
+ l->name = layer["name"].asString();
+ l->dtype = layer["dtype"].asString();
+ l->granularity = layer["granularity"].asString();
+ }
+ p.emplace_back(l);
+ }
+
+ return p;
+}
void print_exclusive_options(void)
{
@@ -56,15 +90,20 @@ int entry(int argc, char **argv)
{
// Simple argument parser (based on map)
std::map<std::string, OptionHook> argparse;
- luci::CircleOptimizer optimizer;
+ luci::CircleQuantizer quantizer;
- auto options = optimizer.options();
+ auto options = quantizer.options();
auto settings = luci::UserSettings::settings();
const std::string qdqw = "--quantize_dequantize_weights";
const std::string qwmm = "--quantize_with_minmax";
const std::string rq = "--requantize";
const std::string fq = "--force_quantparam";
+ const std::string cq = "--copy_quantparam";
+ const std::string fake_quant = "--fake_quantize";
+ const std::string cfg = "--config";
+
+ const std::string tf_maxpool = "--TF-style_maxpool";
const std::string gpd = "--generate_profile_data";
@@ -99,6 +138,19 @@ int entry(int argc, char **argv)
"Three arguments required: input_model_dtype(float32) "
"output_model_dtype(uint8) granularity(layer, channel)");
+ arser.add_argument(tf_maxpool)
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
+ "degrade accuracy of some models");
+
+ arser.add_argument(fake_quant)
+ .nargs(0)
+ .required(false)
+ .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
+ "generate an fp32 model.");
+
arser.add_argument(rq)
.nargs(2)
.type(arser::DataType::STR_VEC)
@@ -116,6 +168,15 @@ int entry(int argc, char **argv)
"Three arguments required: tensor_name(string), "
"scale(float) zero_point(int)");
+ arser.add_argument(cq)
+ .nargs(2)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .accumulated(true)
+ .help("Copy quantization parameter from a tensor to another tensor."
+ "Two arguments required: source_tensor_name(string), "
+ "destination_tensor_name(string)");
+
arser.add_argument("--input_type")
.nargs(1)
.type(arser::DataType::STR)
@@ -128,6 +189,12 @@ int entry(int argc, char **argv)
.required(false)
.help("Output type of quantized model (uint8 or int16)");
+ arser.add_argument(cfg)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Path to the quantization configuration file");
+
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -146,11 +213,13 @@ int entry(int argc, char **argv)
}
{
- // only one of qdqw, qwmm, rq, fq option can be used
+ // only one of qdqw, qwmm, rq, fq, cq, fake_quant option can be used
int32_t opt_used = arser[qdqw] ? 1 : 0;
opt_used += arser[qwmm] ? 1 : 0;
opt_used += arser[rq] ? 1 : 0;
opt_used += arser[fq] ? 1 : 0;
+ opt_used += arser[cq] ? 1 : 0;
+ opt_used += arser[fake_quant] ? 1 : 0;
if (opt_used != 1)
{
print_exclusive_options();
@@ -178,6 +247,22 @@ int entry(int argc, char **argv)
options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+ if (arser[cfg])
+ {
+ auto filename = arser.get<std::string>(cfg);
+ try
+ {
+ auto layer_params = read_layer_params(filename);
+
+ options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << e.what() << '\n';
+ return 255;
+ }
+ }
}
if (arser[qwmm])
@@ -201,6 +286,25 @@ int entry(int argc, char **argv)
if (arser["--output_type"])
options->param(AlgorithmParameters::Quantize_output_type,
arser.get<std::string>("--output_type"));
+
+ if (arser[tf_maxpool] and arser.get<bool>(tf_maxpool))
+ options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "True");
+
+ if (arser[cfg])
+ {
+ auto filename = arser.get<std::string>(cfg);
+ try
+ {
+ auto layer_params = read_layer_params(filename);
+
+ options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << e.what() << '\n';
+ return 255;
+ }
+ }
}
if (arser[rq])
@@ -245,6 +349,34 @@ int entry(int argc, char **argv)
options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
}
+ if (arser[cq])
+ {
+ auto values = arser.get<std::vector<std::vector<std::string>>>(cq);
+
+ std::vector<std::string> src;
+ std::vector<std::string> dst;
+
+ for (auto const value : values)
+ {
+ if (value.size() != 2)
+ {
+ std::cerr << arser;
+ return 255;
+ }
+
+ src.push_back(value[0]);
+ dst.push_back(value[1]);
+ }
+
+ options->enable(Algorithms::CopyQuantParam);
+
+ options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
+ options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
+ }
+
+ if (arser[fake_quant])
+ options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
@@ -279,7 +411,7 @@ int entry(int argc, char **argv)
auto graph = module->graph(idx);
// quantize the graph
- optimizer.quantize(graph);
+ quantizer.quantize(graph);
if (!luci::validate(graph))
{
diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt
index 4524260c4..676aecd53 100644
--- a/compiler/circle-tensordump/CMakeLists.txt
+++ b/compiler/circle-tensordump/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
@@ -19,7 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS})
target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES})
target_link_libraries(circle-tensordump PRIVATE arser)
target_link_libraries(circle-tensordump PRIVATE foder)
-target_link_libraries(circle-tensordump PRIVATE mio_circle)
+target_link_libraries(circle-tensordump PRIVATE mio_circle04)
+target_link_libraries(circle-tensordump PRIVATE mio_circle04_helper)
target_link_libraries(circle-tensordump PRIVATE safemain)
install(TARGETS circle-tensordump DESTINATION bin)
diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake
index 1c754f518..183dfe227 100644
--- a/compiler/circle-tensordump/requires.cmake
+++ b/compiler/circle-tensordump/requires.cmake
@@ -1,4 +1,4 @@
require("arser")
require("foder")
-require("mio-circle")
+require("mio-circle04")
require("safemain")
diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp
index 429736bfe..47b876054 100644
--- a/compiler/circle-tensordump/src/Reader.cpp
+++ b/compiler/circle-tensordump/src/Reader.cpp
@@ -16,66 +16,14 @@
#include "Reader.h"
+#include <mio_circle/Helper.h>
+
#include <sstream>
#include <string>
namespace circletensordump
{
-bool is_valid(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
- assert(opcode);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (!opcode->custom_code())
- return "(invalid custom)";
-
- std::string custom_op = "CUSTOM(";
- custom_op += opcode->custom_code()->c_str();
- custom_op += ")";
- return custom_op;
- }
-
- circle::BuiltinOperator code = opcode->builtin_code();
- return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
- return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- auto name = tensor->name();
- if (name)
- return name->c_str();
-
- return kEmptyTensorName;
-}
-
Reader::Reader(const circle::Model *model)
{
_subgraphs = model->subgraphs();
@@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return mio::circle::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const circle::Operator *op) const
@@ -131,14 +79,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::circle::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- return circletensordump::opcode_name(opcode);
+ return mio::circle::opcode_name(opcode);
}
bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h
index bbb039552..c868bc277 100644
--- a/compiler/circle-tensordump/src/Reader.h
+++ b/compiler/circle-tensordump/src/Reader.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
/**
* @brief Loads Circle file and provides helpers to access attributes
*/
diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt
index f22174865..5d0eb9468 100644
--- a/compiler/circle-verify/CMakeLists.txt
+++ b/compiler/circle-verify/CMakeLists.txt
@@ -1,13 +1,14 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+ message(STATUS "Skip circle-verify: mio_circle04 not found")
return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(circle-verify ${SOURCES})
target_include_directories(circle-verify PRIVATE src)
target_link_libraries(circle-verify arser)
-target_link_libraries(circle-verify mio_circle)
+target_link_libraries(circle-verify mio_circle04)
target_link_libraries(circle-verify safemain)
target_link_libraries(circle-verify cwrap)
target_link_libraries(circle-verify foder)
diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake
index e1b7fb212..74c8f448b 100644
--- a/compiler/circle-verify/requires.cmake
+++ b/compiler/circle-verify/requires.cmake
@@ -1,5 +1,5 @@
require("arser")
-require("mio-circle")
+require("mio-circle04")
require("safemain")
require("cwrap")
require("foder")
diff --git a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
index ee73d63e3..9ccfd0008 100644
--- a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt
index 358fc4e2c..cd79967b7 100644
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -11,7 +11,6 @@ target_link_libraries(circle2circle oops)
target_link_libraries(circle2circle hermes)
target_link_libraries(circle2circle hermes_std)
target_link_libraries(circle2circle loco)
-target_link_libraries(circle2circle mio_circle)
target_link_libraries(circle2circle luci_env)
target_link_libraries(circle2circle luci_import)
target_link_libraries(circle2circle luci_service)
@@ -36,7 +35,6 @@ target_link_libraries(circle2circle_test oops)
target_link_libraries(circle2circle_test hermes)
target_link_libraries(circle2circle_test hermes_std)
target_link_libraries(circle2circle_test loco)
-target_link_libraries(circle2circle_test mio_circle)
target_link_libraries(circle2circle_test luci_env)
target_link_libraries(circle2circle_test luci_import)
target_link_libraries(circle2circle_test luci_service)
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake
index 36a9efd16..b6c61198f 100644
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -3,7 +3,6 @@ require("loco")
require("locop")
require("logo-core")
require("safemain")
-require("mio-circle")
require("oops")
require("hermes")
require("hermes-std")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index a5ddb26dc..ae677a321 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -104,6 +104,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will fold Depthwise Convolution operator with constant inputs");
+ arser.add_argument("--fold_gather")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold Gather operator");
+
arser.add_argument("--fold_sparse_to_dense")
.nargs(0)
.required(false)
@@ -203,6 +209,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will remove Quantize-Dequantize sequence");
+ arser.add_argument("--remove_redundant_quantize")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will remove redundant Quantize operators");
+
arser.add_argument("--remove_redundant_reshape")
.nargs(0)
.required(false)
@@ -452,6 +464,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FoldDequantize);
if (arser.get<bool>("--fold_dwconv"))
options->enable(Algorithms::FoldDepthwiseConv2D);
+ if (arser.get<bool>("--fold_gather"))
+ options->enable(Algorithms::FoldGather);
if (arser.get<bool>("--fold_sparse_to_dense"))
options->enable(Algorithms::FoldSparseToDense);
if (arser.get<bool>("--forward_reshape_to_unaryop"))
@@ -484,6 +498,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::RemoveFakeQuant);
if (arser.get<bool>("--remove_quantdequant"))
options->enable(Algorithms::RemoveQuantDequantSeq);
+ if (arser.get<bool>("--remove_redundant_quantize"))
+ options->enable(Algorithms::RemoveRedundantQuantize);
if (arser.get<bool>("--remove_redundant_reshape"))
options->enable(Algorithms::RemoveRedundantReshape);
if (arser.get<bool>("--remove_redundant_transpose"))
diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt
index 3e2ddcbb3..b124d3027 100644
--- a/compiler/circlechef/CMakeLists.txt
+++ b/compiler/circlechef/CMakeLists.txt
@@ -1,12 +1,14 @@
nnas_find_package(Protobuf QUIET)
if(NOT Protobuf_FOUND)
+ message(STATUS "circlechef: SKIP (missing Protobuf)")
return()
endif(NOT Protobuf_FOUND)
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+ message(STATUS "circlechef: SKIP (missing mio-circle04)")
return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
# Recipe Parser
add_subdirectory(proto)
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt
index 98a284c30..12dc7217b 100644
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES})
target_include_directories(circlechef_circle PUBLIC include)
target_include_directories(circlechef_circle PRIVATE src)
target_link_libraries(circlechef_circle circlechef_proto)
-target_link_libraries(circlechef_circle mio_circle)
+target_link_libraries(circlechef_circle mio_circle04)
+target_link_libraries(circlechef_circle mio_circle04_helper)
target_link_libraries(circlechef_circle cwrap)
target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.cpp b/compiler/circlechef/circle/src/CircleImport.cpp
index e970fbce3..f8756ef94 100644
--- a/compiler/circlechef/circle/src/CircleImport.cpp
+++ b/compiler/circlechef/circle/src/CircleImport.cpp
@@ -18,38 +18,13 @@
#include "Convert.h"
+#include <mio_circle/Helper.h>
+
#include <sstream>
namespace circlechef
{
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
- return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
- auto name = tensor->name();
- if (name)
- return name->c_str();
- return kEmptyTensorName;
-}
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
CircleImport::CircleImport(const circle::Model *model)
{
_subgraphs = model->subgraphs();
@@ -92,7 +67,7 @@ circle::BuiltinOperator CircleImport::builtin_code(const circle::Operator *op) c
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return mio::circle::builtin_code_neutral(opcode);
}
std::string CircleImport::opcode_name(const circle::Operator *op) const
@@ -101,14 +76,14 @@ std::string CircleImport::opcode_name(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::circle::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- if (is_custom(opcode))
+ if (mio::circle::is_custom(opcode))
{
if (!opcode->custom_code())
return "(invalid custom)";
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h
index 23ca29beb..9c1d161b6 100644
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -34,11 +34,6 @@ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>
using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
diff --git a/compiler/circlechef/circle/src/RecipeChef.cpp b/compiler/circlechef/circle/src/RecipeChef.cpp
index cd520cbc3..e21bca8a6 100644
--- a/compiler/circlechef/circle/src/RecipeChef.cpp
+++ b/compiler/circlechef/circle/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
*/
#include <circlechef/RecipeChef.h>
+#include <mio_circle/Helper.h>
#include "Convert.h"
#include "CircleImport.h"
@@ -42,7 +43,7 @@ void set_inputs(CircleImport *import, circlechef::Operation *operation, const ci
else
{
auto tensor = tensors->Get(input);
- std::string name = tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
operation->add_input(name);
}
}
@@ -56,7 +57,7 @@ void set_outputs(CircleImport *import, circlechef::Operation *operation, const c
for (auto output : outputs)
{
auto tensor = tensors->Get(output);
- std::string name = tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
operation->add_output(name);
}
}
@@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
::circlechef::Operand *operand = model_recipe->add_operand();
- operand->set_name(tensor_name(tensor));
+ operand->set_name(mio::circle::tensor_name(tensor));
operand->set_type(as_circlechef_type(tensor->type()));
std::vector<int32_t> dims = as_index_vector(tensor->shape());
@@ -224,14 +225,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
for (const auto input : inputs)
{
auto tensor = tensors->Get(input);
- std::string name = tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
model_recipe->add_input(name);
}
for (const auto output : outputs)
{
auto tensor = tensors->Get(output);
- std::string name = tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
model_recipe->add_output(name);
}
diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt
index 0e8f47483..415954767 100644
--- a/compiler/circlechef/core/CMakeLists.txt
+++ b/compiler/circlechef/core/CMakeLists.txt
@@ -7,7 +7,7 @@ target_include_directories(circlechef_core PUBLIC include)
target_include_directories(circlechef_core PRIVATE src)
target_link_libraries(circlechef_core PUBLIC circlechef_proto)
target_link_libraries(circlechef_core PUBLIC circlechef_log)
-target_link_libraries(circlechef_core PUBLIC mio_circle)
+target_link_libraries(circlechef_core PUBLIC mio_circle04)
target_link_libraries(circlechef_core PUBLIC souschef)
target_link_libraries(circlechef_core PRIVATE nncc_coverage)
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp
index 6975f42a3..6c5206dfc 100644
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -520,6 +520,10 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
for (auto const &opcode : builtin_code_map)
{
circle::OperatorCodeBuilder code_builder{*flatbuffer_builder};
+ int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+ if (opcode.first < circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+ dep_code = static_cast<int8_t>(opcode.first);
+ code_builder.add_deprecated_builtin_code(dep_code);
code_builder.add_builtin_code(opcode.first);
code_builder.add_version(opcode.second);
auto code = code_builder.Finish();
diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake
index 2106146d7..a5d6bedaa 100644
--- a/compiler/circlechef/requires.cmake
+++ b/compiler/circlechef/requires.cmake
@@ -1,9 +1,10 @@
require("arser")
require("nnkit")
require("cwrap")
-require("mio-circle")
+require("mio-circle04")
require("safemain")
require("hermes")
require("hermes-std")
require("foder")
require("souschef")
+require("circle-verify")
diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt
index 773ff5403..7ae619f8b 100644
--- a/compiler/circlechef/tests/CMakeLists.txt
+++ b/compiler/circlechef/tests/CMakeLists.txt
@@ -3,6 +3,15 @@ set(CIRCLERECIPES_DIR "${CircleRecipes_DIR}")
file(GLOB RECIPES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.recipe")
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(CIRCLECHEF_REVERSE_PATH $<TARGET_FILE:circlechef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+ # TODO use better way to represent path for host executable
+ set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+ set(CIRCLECHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/reverse/circlechef-reverse)
+ message(STATUS "CIRCLECHEF_FILE_PATH = ${CIRCLECHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
foreach(RECIPE IN ITEMS ${RECIPES})
get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
@@ -18,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .circle
add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
- COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
- DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+ COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
list(APPEND TESTS ${RECIPE_PREFIX})
@@ -44,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .circle
add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
- COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
- DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+ COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
list(APPEND TESTS ${RECIPE_PREFIX})
@@ -68,16 +77,16 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
# Generate .gen.recipe from generated .circle
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
- COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
- DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
# now we are going to generate .gen.circle from .gen.recipe
# to check generated .gen.recipe file is correct by using it.
# as weight values may be different, binary comparision is not acceptable.
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
- COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
- DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
@@ -96,13 +105,13 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
# Generate .gen.recipe from generated .circle
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
- COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
- DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
- COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
- DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index 7848ac722..b65c06677 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -1,6 +1,7 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+ message(STATUS "Skip circledump: mio_circle04 not found")
return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
set(DRIVER "driver/Driver.cpp")
@@ -9,8 +10,8 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(circledump ${DRIVER} ${SOURCES})
target_include_directories(circledump PRIVATE include)
target_link_libraries(circledump arser)
-target_link_libraries(circledump mio_circle)
+target_link_libraries(circledump mio_circle04)
+target_link_libraries(circledump mio_circle04_helper)
target_link_libraries(circledump safemain)
-target_link_libraries(circledump flatbuffers-1.10)
install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md
index e31c2d560..d2baf26b3 100644
--- a/compiler/circledump/README.md
+++ b/compiler/circledump/README.md
@@ -65,6 +65,6 @@ O T(3) ofm
### Dependency
-- mio-circle
+- mio-circle04
- safemain
- FlatBuffers
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
index 81e0f0dbd..362d67cf4 100644
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,3 +1,3 @@
require("arser")
-require("mio-circle")
+require("mio-circle04")
require("safemain")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp
index 42b4ad97a..0b256dda8 100644
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -15,6 +15,7 @@
*/
#include <circledump/Dump.h>
+#include <mio_circle/Helper.h>
#include "Read.h"
#include "OpPrinter.h"
@@ -141,7 +142,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
// dump operands(tensors)
os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
- << "B(buffer index) OperandName" << std::endl;
+ << "B(buffer index) (variable) OperandName" << std::endl;
for (uint32_t i = 0; i < tensors->Length(); ++i)
{
// TODO refactor to some better structure
@@ -151,7 +152,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
if (tensor->shape())
dims = circleread::as_index_vector(tensor->shape());
- os << "T(" << reader.subgraph_index() << ":" << i << ") " << circleread::tensor_type(tensor)
+ os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor)
<< " ";
os << "(" << dims << ") ";
if (tensor->shape_signature())
@@ -160,7 +161,11 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
os << "(" << dims_sig << ") ";
}
os << "B(" << tensor->buffer() << ") ";
- os << circleread::tensor_name(tensor) << std::endl;
+ if (tensor->is_variable())
+ {
+ os << "(variable) ";
+ }
+ os << mio::circle::tensor_name(tensor) << std::endl;
if (auto q_params = tensor->quantization())
{
@@ -312,7 +317,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
if (input >= 0)
{
auto tensor = tensors->Get(input);
- os << circleread::tensor_name(tensor);
+ os << mio::circle::tensor_name(tensor);
}
os << std::endl;
}
@@ -322,7 +327,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
if (output >= 0)
{
auto tensor = tensors->Get(output);
- os << circleread::tensor_name(tensor);
+ os << mio::circle::tensor_name(tensor);
}
os << std::endl;
}
@@ -335,14 +340,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
for (const auto input : reader.inputs())
{
auto tensor = tensors->Get(input);
- std::string name = circleread::tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
}
for (const auto output : reader.outputs())
{
auto tensor = tensors->Get(output);
- std::string name = circleread::tensor_name(tensor);
+ std::string name = mio::circle::tensor_name(tensor);
os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
}
@@ -364,6 +369,7 @@ void dump_model(std::ostream &os, const circle::Model *model)
auto opcodes = reader.opcodes();
auto buffers = reader.buffers();
auto metadata = reader.metadata();
+ auto signaturedefs = reader.signature_defs();
// dump operator_codes
os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -371,11 +377,14 @@ void dump_model(std::ostream &os, const circle::Model *model)
for (auto opcode : opcodes)
{
circle::BuiltinOperator op_code = opcode->builtin_code();
- auto op_name = circleread::opcode_name(opcode);
+ // cast to int32_t to print as number or int8_t will print as ascii code
+ int32_t dp_code = static_cast<int32_t>(opcode->deprecated_builtin_code());
+
+ auto op_name = mio::circle::opcode_name(opcode);
auto op_version = opcode->version();
os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
- << ", version: " << op_version << ")" << std::endl;
+ << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
opcode_index++;
}
@@ -417,6 +426,37 @@ void dump_model(std::ostream &os, const circle::Model *model)
os << std::endl;
}
+ // dump signaturedef
+ if (signaturedefs != nullptr)
+ {
+ os << "SignatureDef" << std::endl;
+ for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+ {
+ auto sign_i = signaturedefs->Get(i);
+ os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+ << sign_i->subgraph_index() << ")" << std::endl;
+
+ auto inputs_i = sign_i->inputs();
+ for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+ {
+ auto inputs_i_t = inputs_i->Get(t);
+ os << " I(" << t << ")"
+ << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+ << inputs_i_t->name()->c_str() << std::endl;
+ }
+
+ auto outputs_i = sign_i->outputs();
+ for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+ {
+ auto outputs_i_t = outputs_i->Get(t);
+ os << " O(" << t << ")"
+ << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+ << outputs_i_t->name()->c_str() << std::endl;
+ }
+ }
+ os << std::endl;
+ }
+
for (uint32_t sg = 0; sg < num_subgraph; ++sg)
{
reader.select_subgraph(sg);
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp
index ec91ed189..67e7fa5a6 100644
--- a/compiler/circledump/src/Load.cpp
+++ b/compiler/circledump/src/Load.cpp
@@ -76,7 +76,7 @@ public:
{
if (_value != -1)
{
- // Close on descturction
+ // Close on destructor
close(_value);
}
}
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index 7af3ff641..02e5c26b5 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -341,6 +341,7 @@ public:
<< ") ";
os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
<< ") ";
+ os << "keep_num_dims(" << params->keep_num_dims() << ") ";
os << std::endl;
}
@@ -619,6 +620,23 @@ public:
}
};
+class SVDFPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_SVDFOptions())
+ {
+ os << " ";
+ os << "rank(" << params->rank() << ") ";
+ os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
class TransposeConvPrinter : public OpPrinter
{
public:
@@ -754,6 +772,22 @@ public:
}
};
+class InstanceNormPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_InstanceNormOptions())
+ {
+ os << " ";
+ os << "epsilon(" << params->epsilon() << ") ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << std::endl;
+ }
+ }
+};
+
OpPrinterRegistry::OpPrinterRegistry()
{
_op_map[circle::BuiltinOperator_ADD] = make_unique<AddPrinter>();
@@ -824,6 +858,7 @@ OpPrinterRegistry::OpPrinterRegistry()
_op_map[circle::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
_op_map[circle::BuiltinOperator_SUB] = make_unique<SubPrinter>();
_op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+ _op_map[circle::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
_op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
// There is no Option for TOPK_V2
_op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
@@ -835,6 +870,7 @@ OpPrinterRegistry::OpPrinterRegistry()
// Circle only
_op_map[circle::BuiltinOperator_BCQ_FULLY_CONNECTED] = make_unique<BCQFullyConnectedPrinter>();
_op_map[circle::BuiltinOperator_BCQ_GATHER] = make_unique<BCQGatherPrinter>();
+ _op_map[circle::BuiltinOperator_INSTANCE_NORM] = make_unique<InstanceNormPrinter>();
}
} // namespace circledump
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp
index db8298585..3a7e98cde 100644
--- a/compiler/circledump/src/Read.cpp
+++ b/compiler/circledump/src/Read.cpp
@@ -16,72 +16,21 @@
#include "Read.h"
+#include <mio_circle/Helper.h>
+
#include <sstream>
#include <string>
namespace circleread
{
-bool is_valid(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
- circle::BuiltinOperator code = opcode->builtin_code();
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
- assert(opcode);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (!opcode->custom_code())
- return "(invalid custom)";
-
- std::string custom_op = "CUSTOM(";
- custom_op += opcode->custom_code()->c_str();
- custom_op += ")";
- return custom_op;
- }
-
- circle::BuiltinOperator code = opcode->builtin_code();
- return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
- return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- auto name = tensor->name();
- if (name)
- return name->c_str();
-
- return kEmptyTensorName;
-}
-
Reader::Reader(const circle::Model *model)
{
_version = model->version();
_subgraphs = model->subgraphs();
_buffers = model->buffers();
_metadata = model->metadata();
+ _signature_defs = model->signature_defs();
auto opcodes = model->operator_codes();
for (const ::circle::OperatorCode *opcode : *opcodes)
@@ -127,14 +76,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const
assert(index < _op_codes.size());
const circle::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::circle::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- return circleread::opcode_name(opcode);
+ return mio::circle::opcode_name(opcode);
}
bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h
index c61a1ab6d..05b0e5072 100644
--- a/compiler/circledump/src/Read.h
+++ b/compiler/circledump/src/Read.h
@@ -41,12 +41,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
/**
* @brief Loads Circle file and provides helpers to access attributes
*/
@@ -58,6 +52,7 @@ private:
using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
+ using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>;
public:
Reader(const circle::Model *model);
@@ -75,6 +70,7 @@ public:
const std::vector<int32_t> &outputs() const { return _outputs; }
const circle::DataFormat &data_format() const { return _data_format; }
const CircleMetadata_t *metadata() const { return _metadata; }
+ const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
uint32_t num_subgraph() const { return _subgraphs->Length(); }
@@ -95,6 +91,7 @@ private:
const CircleTensors_t *_tensors{nullptr};
const CircleOperators_t *_operators{nullptr};
const CircleMetadata_t *_metadata{nullptr};
+ const CircleSignatureDef_t *_signature_defs{nullptr};
uint32_t _subgraph_index = 0;
std::string _subgraph_name;
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt
index 2ab8c0529..0fb99ddba 100644
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -4,11 +4,11 @@ list(APPEND TESTS "src/App.test.cpp")
add_library(cli ${SOURCES})
target_include_directories(cli PUBLIC include)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
GTest_AddTEst(cli_test ${TESTS})
target_link_libraries(cli_test cli)
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index 6de634a25..404149c15 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -1,82 +1,63 @@
#[[ Generate common python virtual enviornment ]]
-find_package(PythonInterp 3 QUIET)
-find_package(PythonLibs 3 QUIET)
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
if(NOT ${PYTHONINTERP_FOUND})
message(STATUS "Build common-artifacts: FALSE (Python3 is missing)")
return()
endif()
-if(${PYTHON_VERSION_MINOR} LESS 3)
- message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.3)")
+if(${PYTHON_VERSION_MINOR} LESS 8)
+ message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.8)")
return()
endif()
-# Create python virtual environment with tensorflow 1.13.2
-set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
-
-# Create python virtual environment with tensorflow 2.3.0
-set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
# Create python virtual environment with tensorflow 2.6.0
set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
add_custom_command(
- OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
- COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2}
-)
-
-add_custom_command(
- OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
- COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
-)
-add_custom_command(
OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
)
-# Create requirements.txt and install required pip packages
-set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
+# Create python virtual environment with tensorflow 2.8.0
+set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
-# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
add_custom_command(
- OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
- COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
- COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
- COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
- DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
+ OUTPUT ${VIRTUALENV_OVERLAY_TF_2_8_0}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_8_0}
)
-add_custom_command(
- OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
- DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
-)
+# Create requirements.txt and install required pip packages
+set(REQUIREMENTS_FILE "requirements.txt")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
add_custom_command(
OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
)
+add_custom_command(
+ OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
+)
+
add_custom_target(common_artifacts_python_deps ALL
- DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
- ${VIRTUALENV_OVERLAY_TF_2_3_0}
- ${VIRTUALENV_OVERLAY_TF_2_6_0}
- ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
- ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
+ ${VIRTUALENV_OVERLAY_TF_2_8_0}
${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
)
#[[ Generate common resources ]]
@@ -97,7 +78,6 @@ target_link_libraries(testDataGenerator PRIVATE arser)
target_link_libraries(testDataGenerator PRIVATE foder)
target_link_libraries(testDataGenerator PRIVATE luci_import)
target_link_libraries(testDataGenerator PRIVATE luci_interpreter)
-target_link_libraries(testDataGenerator PRIVATE mio_circle)
target_link_libraries(testDataGenerator PRIVATE safemain)
unset(TEST_DEPS)
@@ -109,6 +89,7 @@ set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}")
set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}")
set(TEST_RECIPE_FILENAME "test.recipe")
set(TEST_RULE_FILENAME "test.rule")
+set(TEST_QCONFIG_FILENAME "test.qconf.json")
set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh")
# Get test case list
@@ -140,12 +121,20 @@ endmacro()
include("exclude.lst")
+# TODO revise using variadic arguments
+macro(tcgenerate_option NAME OPTION ARG1 ARG2 ARG3)
+ set(TCGEN_OPT_${NAME} ${OPTION} ${ARG1} ${ARG2} ${ARG3})
+endmacro()
+
+include("options.lst")
+
foreach(RECIPE IN ITEMS ${RECIPES})
unset(OPT_FORMAT)
unset(MODEL_FORMAT)
set(RECIPE_FILE "${RECIPE}.recipe")
set(RULE_FILE "${RECIPE}.rule")
+ set(QCONFIG_FILE "${RECIPE}.qconf.json")
set(TFLITE_RECIPE_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
set(CIRCLE_RECIPE_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
@@ -174,8 +163,20 @@ foreach(RECIPE IN ITEMS ${RECIPES})
set(RULE_SOURCE_PATH ${CIRCLE_RULE_SOURCE_PATH})
endif()
+ set(TFLITE_QCONFIG_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+ set(CIRCLE_QCONFIG_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+
+ unset(QCONFIG_SOURCE_PATH)
+ if(EXISTS "${TFLITE_QCONFIG_SOURCE_PATH}")
+ set(QCONFIG_SOURCE_PATH ${TFLITE_QCONFIG_SOURCE_PATH})
+ endif()
+ if(EXISTS "${CIRCLE_QCONFIG_SOURCE_PATH}")
+ set(QCONFIG_SOURCE_PATH ${CIRCLE_QCONFIG_SOURCE_PATH})
+ endif()
+
set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}")
+ set(QCONFIG_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${QCONFIG_FILE}")
set(TFLITE_FILE "${RECIPE}.tflite")
set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
@@ -200,6 +201,16 @@ foreach(RECIPE IN ITEMS ${RECIPES})
list(APPEND TEST_DEPS ${RULE_BINARY_PATH})
endif()
+ if(DEFINED QCONFIG_SOURCE_PATH)
+ # Copy .qconf.json
+ add_custom_command(OUTPUT ${QCONFIG_BINARY_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${QCONFIG_SOURCE_PATH}" "${QCONFIG_BINARY_PATH}"
+ DEPENDS ${QCONFIG_SOURCE_PATH}
+ COMMENT "Generate ${QCONFIG_FILE}"
+ )
+ list(APPEND TEST_DEPS ${QCONFIG_BINARY_PATH})
+ endif()
+
if(${MODEL_FORMAT} STREQUAL "tflite")
# Generate .tflite
add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
@@ -274,11 +285,21 @@ foreach(RECIPE IN ITEMS ${RECIPES})
)
list(APPEND TEST_DEPS ${TC_DIRECTORY})
+ # set ADDITIONAL_OPTIONS as empty (one space before closing is intentional)
+ set(ADDITIONAL_OPTIONS )
+ if(DEFINED TCGEN_OPT_${RECIPE})
+ set(ADDITIONAL_OPTIONS ${ADDITIONAL_OPTIONS} ${TCGEN_OPT_${RECIPE}})
+ endif()
+
# Generate input.h5, expected.h5
set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5")
set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5")
add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE}
- COMMAND $<TARGET_FILE:testDataGenerator> --input_data ${INPUT_HDF5_FILE} --expected_data ${EXPECTED_HDF5_FILE} ${MODEL_FILE}
+ COMMAND $<TARGET_FILE:testDataGenerator>
+ --input_data ${INPUT_HDF5_FILE}
+ --expected_data ${EXPECTED_HDF5_FILE}
+ ${ADDITIONAL_OPTIONS}
+ ${MODEL_FILE}
DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY}
COMMENT "Generate input.h5 and expected.h5 in ${NNPKG_FILE}/metadata/tc"
)
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index f32e00413..92b07fde8 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -14,7 +14,6 @@ optimize(UnidirectionalSequenceLSTM_001) # This recipe contains is_variable Tens
tcgenerate(Abs_000)
tcgenerate(AddN_000)
tcgenerate(Add_001) # runtime doesn't support
-tcgenerate(Add_U8_000)
tcgenerate(Add_STR_000) # STRING is not supported
tcgenerate(Add_STR_001) # STRING is not supported
tcgenerate(All_000)
@@ -26,32 +25,24 @@ tcgenerate(ArgMin_U8_000)
tcgenerate(ArgMin_U8_001)
tcgenerate(ArgMin_U8_002)
tcgenerate(ArgMin_U8_003)
-tcgenerate(BatchMatMul_000)
tcgenerate(BatchMatMulV2_000)
tcgenerate(BatchMatMulV2_001)
tcgenerate(BatchToSpaceND_000)
tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
-tcgenerate(Cast_000)
-tcgenerate(Cast_001)
tcgenerate(Ceil_000)
tcgenerate(Conv2D_003) # runtime doesn't support dilation
tcgenerate(Cos_000)
tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet
-tcgenerate(Dequantize_000) # runtime and luci-interpreter doesn't support Dequantize op yet
-tcgenerate(ExpandDims_000)
-tcgenerate(ExpandDims_001)
-tcgenerate(ExpandDims_002)
-tcgenerate(ExpandDims_003)
-tcgenerate(ExpandDims_004)
+tcgenerate(ExpandDims_001) # luci-interpreter doesn't support undefined shape
+tcgenerate(ExpandDims_002) # luci-interpreter doesn't support undefined shape
tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet
tcgenerate(Fill_000)
tcgenerate(Fill_001)
tcgenerate(FloorMod_000)
tcgenerate(FloorMod_001)
tcgenerate(FullyConnected_U8_000)
-tcgenerate(Gather_000)
tcgenerate(GatherNd_000)
tcgenerate(GatherNd_001)
tcgenerate(L2Pool2D_U8_000)
@@ -75,8 +66,8 @@ tcgenerate(Mul_U8_000)
tcgenerate(Neg_000)
tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet
-tcgenerate(Net_Conv_QuantDequant_000) # luci-interpreter doesn't support Quantize/Dequantize yet
tcgenerate(Net_Dangle_001)
+tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator
tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
tcgenerate(OneHot_000)
tcgenerate(OneHot_001)
@@ -157,13 +148,11 @@ tcgenerate(While_001) # Needs luci-interpreter int32_t support for ADD, EQUAL
tcgenerate(While_002) # Needs luci-interpreter int32_t support for ADD, EQUAL
tcgenerate(While_003) # Needs luci-interpreter int32_t support for ADD, EQUAL, and dynamic shape for WHILE
tcgenerate(YUV_TO_RGB_000)
-tcgenerate(YUV_TO_RGB_U8_000)
tcgenerate(ZerosLike_000)
## CircleRecipes
tcgenerate(BCQFullyConnected_000)
tcgenerate(BCQFullyConnected_001)
tcgenerate(BCQGather_000)
-tcgenerate(CircleBatchMatMul_000)
tcgenerate(InstanceNorm_000)
tcgenerate(InstanceNorm_001)
diff --git a/compiler/common-artifacts/options.lst b/compiler/common-artifacts/options.lst
new file mode 100644
index 000000000..5e0ff9da5
--- /dev/null
+++ b/compiler/common-artifacts/options.lst
@@ -0,0 +1,6 @@
+## Additional Options for test recipe
+
+#[[ tcgenerate_option : add additional option(s) for generation ]]
+
+# make valid 'indices' input value
+tcgenerate_option(Gather_001 --input_range indices 0 3)
diff --git a/compiler/common-artifacts/requires.cmake b/compiler/common-artifacts/requires.cmake
index d7bed21fe..cc07e17f6 100644
--- a/compiler/common-artifacts/requires.cmake
+++ b/compiler/common-artifacts/requires.cmake
@@ -4,6 +4,6 @@ require("circlechef")
require("foder")
require("luci")
require("luci-interpreter")
-require("mio-circle")
require("safemain")
require("tflchef")
+require("tflite2circle")
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp
index b00e93e88..33cecbbe2 100644
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -18,7 +18,6 @@
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci_interpreter/Interpreter.h>
-#include <mio/circle/schema_generated.h>
#include <H5Cpp.h>
@@ -27,6 +26,9 @@
#include <memory>
#include <random>
#include <string>
+#include <vector>
+#include <cassert>
+#include <cstdlib>
namespace
{
@@ -43,6 +45,8 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
{
case loco::DataType::U8:
return H5::PredType::NATIVE_UINT8;
+ case loco::DataType::S16:
+ return H5::PredType::NATIVE_INT16;
case loco::DataType::S32:
return H5::PredType::NATIVE_INT32;
case loco::DataType::S64:
@@ -56,7 +60,7 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
}
}
-template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, uint32_t size)
+template <typename T> void generate_random_data(std::mt19937 &gen, void *data, uint32_t size)
{
std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
for (uint32_t i = 0; i < size; i++)
@@ -65,7 +69,7 @@ template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, ui
}
}
-template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size)
+template <> void generate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size)
{
std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
for (uint32_t i = 0; i < size; i++)
@@ -74,6 +78,20 @@ template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32
}
}
+template <typename T>
+void generate_random_range(void *data, uint32_t size, int32_t range_min, int32_t range_max)
+{
+ assert(range_min <= range_max);
+
+ for (uint32_t i = 0; i < size; i++)
+ {
+ // +1 will make value of [range_min, range_max]
+ int32_t range = range_max - range_min + 1;
+ int32_t value = (rand() % range) + range_min;
+ static_cast<T *>(data)[i] = static_cast<T>(value);
+ }
+}
+
void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed)
{
std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd()
@@ -81,19 +99,38 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t
switch (dtype)
{
case loco::DataType::U8:
- geneate_random_data<uint8_t>(gen, data, size);
+ generate_random_data<uint8_t>(gen, data, size);
+ break;
+ case loco::DataType::S16:
+ generate_random_data<int16_t>(gen, data, size);
break;
case loco::DataType::S32:
- geneate_random_data<int32_t>(gen, data, size);
+ generate_random_data<int32_t>(gen, data, size);
break;
case loco::DataType::S64:
- geneate_random_data<int64_t>(gen, data, size);
+ generate_random_data<int64_t>(gen, data, size);
break;
case loco::DataType::FLOAT32:
- geneate_random_data<float>(gen, data, size);
+ generate_random_data<float>(gen, data, size);
break;
case loco::DataType::BOOL:
- geneate_random_data<bool>(gen, data, size);
+ generate_random_data<bool>(gen, data, size);
+ break;
+ default:
+ throw std::runtime_error("NYI data type.");
+ }
+}
+
+void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t range_min,
+ int32_t range_max)
+{
+ switch (dtype)
+ {
+ case loco::DataType::S32:
+ generate_random_range<int32_t>(data, size, range_min, range_max);
+ break;
+ case loco::DataType::S64:
+ generate_random_range<int64_t>(data, size, range_min, range_max);
break;
default:
throw std::runtime_error("NYI data type.");
@@ -120,6 +157,11 @@ int entry(int argc, char **argv)
.required(false)
.nargs(0)
.help("Put a fixed seed into the random number generator");
+ arser.add_argument("--input_range")
+ .required(false)
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .help("Set random number range [min max] for the input as 'name min max'");
try
{
@@ -176,6 +218,24 @@ int entry(int argc, char **argv)
std::unique_ptr<H5::Group> output_value_group =
std::make_unique<H5::Group>(output_file.createGroup("value"));
+ std::string range_name;
+ int32_t range_min = 0;
+ int32_t range_max = 0;
+ bool range_check = false;
+ bool range_input_found = false;
+ if (arser["--input_range"])
+ {
+ // NOTE limitation: we can only set one input range
+ // TODO expand this for multiple inputs
+ std::vector<std::string> values = arser.get<std::vector<std::string>>("--input_range");
+ assert(values.size() == 3);
+ range_name = values.at(0);
+ // TODO add check for valid numbers
+ range_min = std::atoi(values.at(1).c_str());
+ range_max = std::atoi(values.at(2).c_str());
+ range_check = true;
+ }
+
std::random_device rd; // used to obtain a seed for the random number engine
uint32_t input_index = 0;
// TODO remove indentation
@@ -187,6 +247,7 @@ int entry(int argc, char **argv)
{
const auto *input_node = dynamic_cast<const luci::CircleInput *>(node);
std::string name = input_node->name();
+ assert(not name.empty());
if (name.find(":") == std::string::npos)
name += ":0";
@@ -217,7 +278,12 @@ int entry(int argc, char **argv)
std::vector<int8_t> data(byte_size);
// generate random data
- if (arser["--fixed_seed"])
+ if (range_name == input_node->name())
+ {
+ fill_random_range(data.data(), data_size, input_node->dtype(), range_min, range_max);
+ range_input_found = true;
+ }
+ else if (arser["--fixed_seed"])
fill_random_data(data.data(), data_size, input_node->dtype(), 0);
else
fill_random_data(data.data(), data_size, input_node->dtype(), rd());
@@ -230,6 +296,12 @@ int entry(int argc, char **argv)
}
}
+ if (range_check && not range_input_found)
+ {
+ std::cerr << "ERROR: input_range for input [" << range_name << "] not found." << std::endl;
+ return EXIT_FAILURE;
+ }
+
interpreter.interpret();
// dump output data into hdf5 file
diff --git a/compiler/dio-hdf5/CMakeLists.txt b/compiler/dio-hdf5/CMakeLists.txt
new file mode 100644
index 000000000..199c0d59d
--- /dev/null
+++ b/compiler/dio-hdf5/CMakeLists.txt
@@ -0,0 +1,30 @@
+nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
+
+if(NOT HDF5_FOUND)
+ message(STATUS "Build dio_hdf5: FAILED (missing HDF5)")
+ return()
+endif(NOT HDF5_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(dio_hdf5 SHARED ${SOURCES})
+target_include_directories(dio_hdf5 PUBLIC include)
+target_include_directories(dio_hdf5 PUBLIC ${HDF5_INCLUDE_DIRS})
+target_link_libraries(dio_hdf5 PUBLIC ${HDF5_CXX_LIBRARIES})
+target_link_libraries(dio_hdf5 PUBLIC loco)
+
+install(TARGETS dio_hdf5 DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+ FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(dio_hdf5_test ${TESTS})
+target_include_directories(dio_hdf5_test PRIVATE include)
+target_link_libraries(dio_hdf5_test dio_hdf5)
diff --git a/compiler/dio-hdf5/README.md b/compiler/dio-hdf5/README.md
new file mode 100644
index 000000000..aa2398ce8
--- /dev/null
+++ b/compiler/dio-hdf5/README.md
@@ -0,0 +1,29 @@
+# dio-hdf5
+
+_dio-hdf5_ is a library to help loading hdf5 files (_dio_ indicates data I/O).
+
+The hdf5 file should have the following structure.
+
+```
+Group "/"
+ > Group <group_name>
+ > Group <data_idx>
+ > Dataset <input_idx>
+```
+
+## Example
+
+```cpp
+dio_hdf5::HDF5Importer h5{input_path};
+
+h5.importGroup("value");
+
+// Prepare buffer
+const uint32_t input_byte_size = 16;
+std::vector<char> buffer(input_byte_size);
+
+// Write the first input of the first data to buffer
+readTensor(0, 0, buffer.data());
+
+DO_SOMETHING_WITH(buffer);
+```
diff --git a/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h
new file mode 100644
index 000000000..aafcfbbf3
--- /dev/null
+++ b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIO_HDF5_H__
+#define __DIO_HDF5_H__
+
+#include <H5Cpp.h>
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace dio
+{
+namespace hdf5
+{
+
+// HDF5Importer reads an input data saved in the hdf5 file in the given path
+// The hierarchy of the hdf5 file is as follows.
+// Group "/"
+// > Group <group_name>
+// > Group <data_idx>
+// > Dataset <input_idx>
+// data_idx : index of the data (dataset file can contain multiple data)
+// input_idx : index of the input (DNN model can have multiple inputs)
+// Ex: the j'th input of the i'th data of group 'value' can be accessed by "/value/i/j"
+class HDF5Importer final
+{
+public:
+ explicit HDF5Importer(const std::string &path);
+
+public:
+ /**
+ * @note importGroup has to be called before readTensor is called
+ * Otherwise, readTensor will throw an exception
+ */
+ void importGroup(const std::string &group) { _group = _file.openGroup(group); }
+
+ /**
+ * @brief Read tensor data from file and store it into buffer
+ * @details A tensor in the file can be retrieved with (data_idx, input_idx)
+ * @param data_idx : index of the data
+ * @param input_idx : index of the input
+ * @param dtype : pointer to write the tensor's data type
+ * @param shape : pointer to write the tensor's shape
+ * @param buffer : pointer to write the tensor's data
+ */
+ void readTensor(int32_t data_idx, int32_t input_idx, loco::DataType *dtype,
+ std::vector<loco::Dimension> *shape, void *buffer);
+
+ // Read a raw tensor (no type/shape is specified)
+ void readTensor(int32_t data_idx, int32_t input_idx, void *buffer);
+
+ bool isRawData() { return _group.attrExists("rawData"); }
+
+ int32_t numData() { return _group.getNumObjs(); }
+
+ int32_t numInputs(int32_t data_idx);
+
+private:
+ H5::H5File _file;
+ H5::Group _group;
+};
+
+} // namespace hdf5
+} // namespace dio
+
+#endif // __DIO_HDF5_H__
diff --git a/compiler/dio-hdf5/requires.cmake b/compiler/dio-hdf5/requires.cmake
new file mode 100644
index 000000000..44f6870da
--- /dev/null
+++ b/compiler/dio-hdf5/requires.cmake
@@ -0,0 +1 @@
+require("loco")
diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/dio-hdf5/src/HDF5Importer.cpp
index cfb270ce0..9ae556b77 100644
--- a/compiler/record-minmax/src/HDF5Importer.cpp
+++ b/compiler/dio-hdf5/src/HDF5Importer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,16 +14,17 @@
* limitations under the License.
*/
-#include "HDF5Importer.h"
+#include "dio_hdf5/HDF5Importer.h"
#include <H5Cpp.h>
#include <string>
+#include <vector>
#include <cassert>
#include <stdexcept>
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
namespace
{
@@ -36,10 +37,10 @@ Shape toInternalShape(const H5::DataSpace &dataspace)
dims.resize(rank, 0);
dataspace.getSimpleExtentDims(dims.data());
- Shape res(rank);
+ Shape res;
for (int axis = 0; axis < rank; ++axis)
{
- res.dim(axis) = dims[axis];
+ res.emplace_back(dims[axis]);
}
return res;
@@ -108,18 +109,28 @@ void readTensorData(H5::DataSet &tensor, int64_t *buffer)
} // namespace
-namespace record_minmax
+namespace dio
{
+namespace hdf5
+{
+
+HDF5Importer::HDF5Importer(const std::string &path)
+{
+ if (_file.isHdf5(path) == false)
+ throw std::runtime_error("Given data file is not HDF5");
+
+ _file = H5::H5File(path, H5F_ACC_RDONLY);
+}
int32_t HDF5Importer::numInputs(int32_t record_idx)
{
- auto records = _value_grp.openGroup(std::to_string(record_idx));
+ auto records = _group.openGroup(std::to_string(record_idx));
return records.getNumObjs();
}
void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer)
{
- auto record = _value_grp.openGroup(std::to_string(record_idx));
+ auto record = _group.openGroup(std::to_string(record_idx));
auto tensor = record.openDataSet(std::to_string(input_idx));
readTensorData(tensor, static_cast<uint8_t *>(buffer));
@@ -128,7 +139,7 @@ void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffe
void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
void *buffer)
{
- auto record = _value_grp.openGroup(std::to_string(record_idx));
+ auto record = _group.openGroup(std::to_string(record_idx));
auto tensor = record.openDataSet(std::to_string(input_idx));
auto tensor_dtype = tensor.getDataType();
@@ -156,4 +167,5 @@ void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *d
}
}
-} // namespace record_minmax
+} // namespace hdf5
+} // namespace dio
diff --git a/compiler/dio-hdf5/src/HDF5Importer.test.cpp b/compiler/dio-hdf5/src/HDF5Importer.test.cpp
new file mode 100644
index 000000000..61a027fc5
--- /dev/null
+++ b/compiler/dio-hdf5/src/HDF5Importer.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dio_hdf5/HDF5Importer.h"
+
+#include <loco.h>
+
+#include <H5Cpp.h>
+
+#include <cstdio>
+
+#include <gtest/gtest.h>
+
+using HDF5Importer = dio::hdf5::HDF5Importer;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace
+{
+
+const std::string file_name("dio_hdf5_test.h5");
+
+void createFile()
+{
+ // File already exists. Remove it.
+ if (auto f = fopen(file_name.c_str(), "r"))
+ {
+ fclose(f);
+ if (remove(file_name.c_str()) != 0)
+ throw std::runtime_error("Error deleting file.");
+ }
+
+ const auto rank = 3;
+ hsize_t dim[3] = {1, 2, 3};
+ H5::DataSpace space(rank, dim);
+
+ float data[] = {0, 1, 2, 3, 4, 5};
+
+ // Create test file in the current directory
+ H5::H5File file(file_name, H5F_ACC_TRUNC);
+ {
+ file.createGroup("/value");
+ file.createGroup("/value/0");
+ H5::DataSet dataset(file.createDataSet("/value/0/0", H5::PredType::IEEE_F32BE, space));
+ dataset.write(data, H5::PredType::IEEE_F32LE);
+ }
+}
+
+} // namespace
+
+TEST(dio_hdf5_test, read_with_type_shape)
+{
+ createFile();
+
+ HDF5Importer h5(::file_name);
+
+ h5.importGroup("value");
+
+ std::vector<float> buffer(6);
+
+ DataType dtype;
+ Shape shape;
+ h5.readTensor(0, 0, &dtype, &shape, buffer.data());
+
+ for (uint32_t i = 0; i < 6; i++)
+ EXPECT_EQ(i, buffer[i]);
+
+ EXPECT_EQ(DataType::FLOAT32, dtype);
+ EXPECT_EQ(3, shape.size());
+ EXPECT_EQ(1, shape[0]);
+ EXPECT_EQ(2, shape[1]);
+ EXPECT_EQ(3, shape[2]);
+}
+
+TEST(dio_hdf5_test, wrong_path_NEG)
+{
+ const std::string wrong_path = "not_existing_file_for_dio_hdf5_test";
+
+ EXPECT_ANY_THROW(HDF5Importer h5(wrong_path));
+}
+
+TEST(dio_hdf5_test, wrong_group_name_NEG)
+{
+ createFile();
+
+ HDF5Importer h5(::file_name);
+
+ EXPECT_ANY_THROW(h5.importGroup("wrong"));
+}
+
+TEST(dio_hdf5_test, data_out_of_index_NEG)
+{
+ createFile();
+
+ HDF5Importer h5(::file_name);
+
+ h5.importGroup("value");
+
+ std::vector<float> buffer(6);
+
+ DataType dtype;
+ Shape shape;
+ // Read non-existing data (data_idx = 1)
+ EXPECT_ANY_THROW(h5.readTensor(1, 0, &dtype, &shape, buffer.data()));
+}
+
+TEST(dio_hdf5_test, input_out_of_index_NEG)
+{
+ createFile();
+
+ HDF5Importer h5(::file_name);
+
+ h5.importGroup("value");
+
+ std::vector<float> buffer(6);
+
+ DataType dtype;
+ Shape shape;
+ // Read non-existing input (input_idx = 1)
+ EXPECT_ANY_THROW(h5.readTensor(0, 1, &dtype, &shape, buffer.data()));
+}
diff --git a/compiler/dredd-rule-lib/rule-lib.sh b/compiler/dredd-rule-lib/rule-lib.sh
index 9254cc9a7..c25dc5fb4 100755
--- a/compiler/dredd-rule-lib/rule-lib.sh
+++ b/compiler/dredd-rule-lib/rule-lib.sh
@@ -217,4 +217,21 @@ op_version()
echo ${ACTUAL}
}
+tensor_dtype()
+{
+ argc_check $# 1
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${INSPECT_PROG_PATH}
+
+ set -o pipefail
+
+ ACTUAL=`init_error_log ; \
+ ${INSPECT_PROG_PATH} --tensor_dtype ${COMPILED_FILE} | \
+ awk -v tensor_name="$1" '{ if ($1 == tensor_name) print $2}'`
+
+ check_success_exit_code $? 0
+
+ echo ${ACTUAL}
+}
+
# TODO define more qullity test function
diff --git a/compiler/embedded-import-value-test/.gitignore b/compiler/embedded-import-value-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/embedded-import-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/embedded-import-value-test/CMakeLists.txt b/compiler/embedded-import-value-test/CMakeLists.txt
new file mode 100644
index 000000000..785edfc7d
--- /dev/null
+++ b/compiler/embedded-import-value-test/CMakeLists.txt
@@ -0,0 +1,34 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+set(SRCS_TEST_DRIVER src/TestDriver.cpp)
+
+# create driver
+add_executable(test_driver ${SRCS_TEST_DRIVER})
+target_link_libraries(test_driver PRIVATE luci_interpreter_import)
+target_link_libraries(test_driver PRIVATE luci_interpreter)
+target_link_libraries(test_driver PRIVATE safemain)
+
+unset(EMBEDDED_IMPORT_VALUE_TESTS)
+
+macro(addeval NAME)
+ list(APPEND EMBEDDED_IMPORT_VALUE_TESTS ${NAME})
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+# Generate dependencies
+add_custom_target(embedded_import_testfiles ALL DEPENDS ${TESTFILES})
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+add_test(NAME embedded_import_value_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${EMBEDDED_IMPORT_VALUE_TESTS}
+)
diff --git a/compiler/embedded-import-value-test/README.md b/compiler/embedded-import-value-test/README.md
new file mode 100644
index 000000000..71a95486f
--- /dev/null
+++ b/compiler/embedded-import-value-test/README.md
@@ -0,0 +1,13 @@
+# embedded-import-value-test
+
+`embedded-import-value-test` checks models imported with and without constant copying produces same output values.
+
+The test proceeds as follows:
+
+1. Generate random input for provided circle model.
+
+2. Import circle model to luci in 2 modes:
+ - With constant copying (default mode).
+ - Without constant copying (experimental feature)
+
+3. Compare the execution result of both modes. The result must be the same.
diff --git a/compiler/embedded-import-value-test/evalverify.sh b/compiler/embedded-import-value-test/evalverify.sh
new file mode 100755
index 000000000..a99e76f3e
--- /dev/null
+++ b/compiler/embedded-import-value-test/evalverify.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# This script verifies that imported without constants copying models executes well in luci_interpreter
+#
+# HOW TO USE
+#
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# bin_dir : build directory of embedded-import-value-test (ex: build/compiler/embedded-import-value-test)
+# work_dir : artifacts directory where test materials exist
+
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+TEST_DRIVER_PATH="${BINDIR}/test_driver"
+TEST_RESULT_DIR="${BINDIR}/result"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+mkdir -p "${TEST_RESULT_DIR}"
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${TEST_RESULT_DIR}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ "${TEST_DRIVER_PATH}" --model "${TESTCASE_FILE}.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/embedded-import-value-test/requires.cmake b/compiler/embedded-import-value-test/requires.cmake
new file mode 100644
index 000000000..f8af5f27e
--- /dev/null
+++ b/compiler/embedded-import-value-test/requires.cmake
@@ -0,0 +1,6 @@
+require("common-artifacts")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
diff --git a/compiler/embedded-import-value-test/src/TestDriver.cpp b/compiler/embedded-import-value-test/src/TestDriver.cpp
new file mode 100644
index 000000000..63fd745eb
--- /dev/null
+++ b/compiler/embedded-import-value-test/src/TestDriver.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/GraphBuilderRegistry.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <luci/Importer.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <random>
+
+namespace
+{
+
+uint32_t tensor_size_of(const luci::CircleNode *node)
+{
+ uint32_t tensor_size = loco::size(node->dtype());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ tensor_size *= node->dim(i).value();
+ return tensor_size;
+}
+
+std::vector<uint8_t> random_data_for(const luci::CircleInput *node)
+{
+ // allocate data buffer
+ std::vector<uint8_t> inputs_data(tensor_size_of(node));
+ auto *buffer = inputs_data.data();
+
+ // define size of buffer in elements
+ const auto dtype = node->dtype();
+ assert(inputs_data.size() % loco::size(dtype) == 0); // FIX ME UNLESS
+ const auto element_count = inputs_data.size() / loco::size(dtype);
+
+ // random generator engine
+ std::random_device device;
+ std::mt19937 engine{device()};
+
+ // fill buffer with random data
+ switch (node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto element_buffer = reinterpret_cast<float *>(buffer);
+
+ std::uniform_real_distribution<float> distrib(-3, 3);
+ const auto generator = [&distrib, &engine]() { return distrib(engine); };
+ std::generate(element_buffer, element_buffer + element_count, generator);
+
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ auto element_buffer = buffer;
+
+ std::uniform_int_distribution<uint8_t> distrib(100, 200);
+ const auto generator = [&distrib, &engine]() { return distrib(engine); };
+ std::generate(element_buffer, element_buffer + element_count, generator);
+
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ auto element_buffer = reinterpret_cast<int16_t *>(buffer);
+
+ std::uniform_int_distribution<int16_t> distrib(0, 100);
+ const auto generator = [&distrib, &engine]() { return distrib(engine); };
+ std::generate(element_buffer, element_buffer + element_count, generator);
+
+ break;
+ }
+ case loco::DataType::S32:
+ {
+ auto element_buffer = reinterpret_cast<int32_t *>(buffer);
+
+ std::uniform_int_distribution<int32_t> distrib(0, 100);
+ const auto generator = [&distrib, &engine]() { return distrib(engine); };
+ std::generate(element_buffer, element_buffer + element_count, generator);
+
+ break;
+ }
+ case loco::DataType::BOOL:
+ {
+ // num of bool data type is equivalent to uint8_t num in [0, 1] range
+ auto element_buffer = buffer;
+
+ std::uniform_int_distribution<uint8_t> distrib(0, 1);
+ const auto generator = [&distrib, &engine]() { return distrib(engine); };
+ std::generate(element_buffer, element_buffer + element_count, generator);
+
+ break;
+ }
+ default:
+ // TODO Support other dtypes
+ throw std::runtime_error("Unsupported data type, yet!");
+ }
+
+ return inputs_data;
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+ // check arguments
+ if (argc != 3 || std::string(argv[1]) != "--model")
+ {
+ std::cerr << "Usage: " << argv[0] << " --model <path/to/model>" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // open file with model
+ const auto model_file = std::string(argv[2]);
+ std::ifstream fs(model_file, std::ifstream::binary);
+ if (fs.fail())
+ {
+ std::cerr << "Cannot open model file \"" << model_file << "\"." << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // create constant circle model
+ const std::vector<char> model_buffer((std::istreambuf_iterator<char>(fs)),
+ std::istreambuf_iterator<char>());
+ const auto circle_model = circle::GetModel(model_buffer.data());
+
+ // create random model's inputs
+ std::vector<std::vector<uint8_t>> inputs_data;
+ {
+ // model inputs
+ auto model = luci::Importer(nullptr).importModule(circle_model);
+ const auto inputs = loco::input_nodes(model->graph());
+
+ // create random data for each input
+ for (const auto *input : inputs)
+ {
+ const auto input_node = loco::must_cast<const luci::CircleInput *>(input);
+ inputs_data.emplace_back(random_data_for(input_node));
+ }
+ }
+
+ // interpret given module
+ const auto interpret_module_and_compute_output =
+ [&](const std::unique_ptr<luci::Module> &module) {
+ // create interpreter
+ luci_interpreter::Interpreter interpreter(module.get());
+
+ // model's input and output nodes
+ const auto input_nodes = loco::input_nodes(module->graph());
+ const auto output_nodes = loco::output_nodes(module->graph());
+
+ // set inputs
+ for (uint32_t i = 0; i < input_nodes.size(); ++i)
+ {
+ const auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+ const auto &data = inputs_data.at(i);
+ interpreter.writeInputTensor(input_node, data.data(), data.size());
+ }
+
+ // do inference
+ interpreter.interpret();
+
+ // read outputs
+ std::vector<std::vector<uint8_t>> outputs_data;
+ for (const auto *node : output_nodes)
+ {
+ const auto output_node = loco::must_cast<const luci::CircleOutput *>(node);
+
+ // allocate output buffer
+ outputs_data.emplace_back(tensor_size_of(output_node));
+
+ auto &data = outputs_data.back();
+ interpreter.readOutputTensor(output_node, data.data(), data.size());
+ }
+
+ return outputs_data;
+ };
+
+ // import with copying, execute and save
+ std::vector<std::vector<uint8_t>> outputs_data_1;
+ {
+ const auto default_source = &luci::GraphBuilderRegistry::get();
+ const auto module = luci::Importer(default_source).importModule(circle_model);
+ if (not module)
+ {
+ std::cerr << "Fail to import model with constant copying." << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ outputs_data_1 = interpret_module_and_compute_output(module);
+ }
+
+ // import without copying, execute and save
+ std::vector<std::vector<uint8_t>> outputs_data_2;
+ {
+ const auto optimized_source = luci_interpreter::source_without_constant_copying();
+ const auto module = luci::Importer(optimized_source.get()).importModule(circle_model);
+ if (not module)
+ {
+ std::cerr << "Fail to import model without constant copying." << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ outputs_data_2 = interpret_module_and_compute_output(module);
+ }
+
+ // check all tensors are equal
+ assert(outputs_data_1.size() == outputs_data_2.size());
+ for (uint32_t n = 0; n < outputs_data_1.size(); ++n)
+ {
+ const auto &output_1 = outputs_data_1.at(n);
+ const auto &output_2 = outputs_data_2.at(n);
+ assert(output_1.size() == output_2.size());
+
+ for (uint32_t o = 0; o < output_1.size(); ++o)
+ {
+ if (output_1[o] != output_2[o])
+ {
+ std::cerr << "Values mismatch in model's output number " << n << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+ }
+
+ std::cout << "[TEST PASSED]" << std::endl;
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/embedded-import-value-test/test.lst b/compiler/embedded-import-value-test/test.lst
new file mode 100644
index 000000000..924a60dcc
--- /dev/null
+++ b/compiler/embedded-import-value-test/test.lst
@@ -0,0 +1,192 @@
+#addeval(Abs_000)
+addeval(Add_000)
+#addeval(Add_001)
+addeval(Add_U8_000)
+#addeval(AddN_000)
+addeval(ArgMax_000)
+addeval(ArgMax_001)
+addeval(ArgMax_002)
+addeval(ArgMax_003)
+addeval(ArgMax_U8_000)
+addeval(ArgMax_U8_001)
+addeval(ArgMax_U8_002)
+addeval(ArgMax_U8_003)
+#addeval(ArgMin_000)
+#addeval(ArgMin_001)
+#addeval(ArgMin_002)
+#addeval(ArgMin_003)
+#addeval(ArgMin_U8_000)
+#addeval(ArgMin_U8_001)
+#addeval(ArgMin_U8_002)
+#addeval(ArgMin_U8_003)
+addeval(AveragePool2D_000)
+#addeval(BatchMatMul_000)
+#addeval(BatchMatMulV2_000)
+#addeval(BatchMatMulV2_001)
+#addeval(BatchToSpaceND_000)
+addeval(Cast_000)
+addeval(Cast_001)
+#addeval(Ceil_000)
+addeval(Concatenation_000)
+addeval(Concatenation_U8_000)
+addeval(Conv2D_000)
+addeval(Conv2D_001)
+addeval(Conv2D_002)
+addeval(Conv2D_003)
+addeval(Conv2D_U8_000)
+addeval(Conv2D_U8_001)
+#addeval(Cos_000)
+addeval(DepthToSpace_000)
+addeval(DepthwiseConv2D_000)
+addeval(DepthwiseConv2D_U8_000)
+#addeval(DepthwiseConv2D_U8_001)
+addeval(DepthwiseConv2D_001)
+addeval(Div_000)
+addeval(ELU_000)
+addeval(Equal_000)
+addeval(Exp_000)
+#addeval(ExpandDims_000)
+#addeval(ExpandDims_001)
+#addeval(ExpandDims_002)
+#addeval(ExpandDims_003)
+#addeval(Fill_000)
+#addeval(Fill_001)
+addeval(Floor_000)
+#addeval(FloorDiv_000)
+#addeval(FloorDiv_001)
+#addeval(FloorMod_000)
+#addeval(FloorMod_001)
+addeval(FullyConnected_000)
+addeval(FullyConnected_001)
+addeval(FullyConnected_002)
+#addeval(FullyConnected_U8_000)
+addeval(Gather_000)
+#addeval(GatherNd_000)
+#addeval(Greater_000)
+#addeval(GreaterEqual_000)
+addeval(If_000)
+addeval(If_001)
+addeval(L2Normalize_000)
+addeval(L2Pool2D_000)
+#addeval(L2Pool2D_U8_000)
+addeval(LeakyRelu_000)
+addeval(Less_000)
+addeval(LessEqual_000)
+addeval(LocalResponseNormalization_000)
+#addeval(Log_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
+addeval(Logistic_000)
+addeval(LogSoftmax_000)
+#addeval(MatMul_000)
+#addeval(MatrixDiag_000)
+#addeval(MatrixSetDiag_000)
+addeval(Maximum_000)
+addeval(MaxPool2D_000)
+addeval(MaxPool2D_U8_000)
+addeval(Mean_000)
+addeval(Mean_001)
+#addeval(Mean_U8_000)
+#addeval(Minimum_000)
+#addeval(MirrorPad_000)
+addeval(Mul_000)
+#addeval(Mul_U8_000)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
+#addeval(OneHot_003)
+addeval(Pack_000)
+addeval(Pack_U8_000)
+addeval(Pad_000)
+addeval(Pad_U8_000)
+addeval(Pow_000)
+addeval(PRelu_000)
+#addeval(Range_000)
+#addeval(Rank_000)
+#addeval(ReduceAny_000)
+#addeval(ReduceAny_001)
+#addeval(ReduceAny_002)
+#addeval(ReduceAny_003)
+#addeval(ReduceMax_000)
+#addeval(ReduceMin_000)
+#addeval(ReduceProd_000)
+#addeval(ReduceProd_001)
+#addeval(ReduceProd_002)
+#addeval(ReduceProd_003)
+addeval(ReLU_000)
+addeval(ReLU6_000)
+#addeval(ReLUN1To1_000)
+addeval(Reshape_000)
+addeval(Reshape_001)
+addeval(Reshape_002)
+#addeval(Reshape_003)
+addeval(Reshape_U8_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
+#addeval(ReverseSequence_000)
+#addeval(ReverseV2_000)
+#addeval(Round_000)
+addeval(Rsqrt_000)
+#addeval(ScatterNd_000)
+#addeval(SegmentSum_000)
+#addeval(Select_000)
+#addeval(Select_001)
+#addeval(Select_002)
+#addeval(SelectV2_000)
+#addeval(SelectV2_001)
+#addeval(SelectV2_002)
+#addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
+#addeval(Sin_000)
+addeval(Slice_000)
+addeval(Softmax_000)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
+addeval(SpaceToDepth_000)
+#addeval(SparseToDense_000)
+addeval(Split_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
+addeval(Squeeze_000)
+addeval(Squeeze_001)
+addeval(StridedSlice_000)
+addeval(StridedSlice_001)
+addeval(StridedSlice_002)
+addeval(Sub_000)
+addeval(Sub_U8_000)
+#addeval(Sum_000)
+#addeval(Sum_001)
+addeval(SVDF_000)
+addeval(SVDF_001)
+addeval(Tanh_000)
+#addeval(Tile_000)
+#addeval(Tile_U8_000)
+#addeval(TopKV2_000)
+#addeval(TopKV2_001)
+addeval(Transpose_000)
+addeval(TransposeConv_000)
+addeval(Unpack_000)
+addeval(Unpack_001)
+addeval(Unpack_002)
+addeval(Unpack_003)
+#addeval(Where_000)
+#addeval(Where_001)
+#addeval(While_000)
+#addeval(While_001)
+#addeval(While_002)
+#addeval(While_003)
+addeval(YUV_TO_RGB_U8_000)
+#addeval(ZerosLike_000)
+
+# Simple Network test
+addeval(Part_While_000)
+addeval(Part_While_001)
diff --git a/compiler/enco/CMakeLists.txt b/compiler/enco/CMakeLists.txt
index 17300e25e..3702f9501 100644
--- a/compiler/enco/CMakeLists.txt
+++ b/compiler/enco/CMakeLists.txt
@@ -1,4 +1,9 @@
add_subdirectory(core)
add_subdirectory(frontend)
add_subdirectory(cli)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_subdirectory(test)
diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt
index 25dad2bc6..19a64231a 100644
--- a/compiler/enco/core/CMakeLists.txt
+++ b/compiler/enco/core/CMakeLists.txt
@@ -20,11 +20,11 @@ target_link_libraries(enco_core PRIVATE morph)
# Let's use nncc project-wide build options
target_link_libraries(enco_core PRIVATE nncc_common)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
add_executable(enco_core_test ${TESTS})
target_include_directories(enco_core_test PRIVATE src)
diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt
index 9722392a1..baf7f7bd6 100644
--- a/compiler/enco/frontend/caffe/CMakeLists.txt
+++ b/compiler/enco/frontend/caffe/CMakeLists.txt
@@ -17,11 +17,11 @@ target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
target_link_libraries(enco_caffe_frontend morph)
target_link_libraries(enco_caffe_frontend caffeproto)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
nnas_find_package(Caffe QUIET)
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt
index b2de2b34b..995e66f81 100644
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
return()
@@ -17,16 +17,15 @@ add_library(enco_tflite_frontend SHARED ${SOURCES})
target_include_directories(enco_tflite_frontend PRIVATE src)
target_link_libraries(enco_tflite_frontend enco_intf_frontend)
target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
-target_link_libraries(enco_tflite_frontend flatbuffers-1.10)
target_link_libraries(enco_tflite_frontend enco_tflite_schema)
target_link_libraries(enco_tflite_frontend morph)
target_link_libraries(enco_tflite_frontend cwrap)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
add_executable(enco_tflite_frontend_test ${TESTS})
target_include_directories(enco_tflite_frontend_test PRIVATE src)
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt
index 9d02f7cba..645db714c 100644
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build exo: FALSE (missing FlatBuffers)")
@@ -15,7 +15,7 @@ endif(NOT TensorFlowSource_FOUND)
message(STATUS "Build exo: TRUE")
set(TFLITE_SCHEMA_DIR "${TensorFlowSource_DIR}/tensorflow/lite/schema")
-set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema")
+set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3")
FlatBuffers_Target(exo_tflite_fbs
OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt
index 8fce31953..673d7056c 100644
--- a/compiler/hermes-std/CMakeLists.txt
+++ b/compiler/hermes-std/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(hermes_std STATIC ${SOURCES})
-set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(hermes_std PUBLIC include)
target_link_libraries(hermes_std PUBLIC hermes)
target_link_libraries(hermes_std PRIVATE pepper_strcast)
diff --git a/compiler/hermes-std/include/hermes/ConsoleReporter.h b/compiler/hermes-std/include/hermes/ConsoleReporter.h
index e09dd5785..c55e46a17 100644
--- a/compiler/hermes-std/include/hermes/ConsoleReporter.h
+++ b/compiler/hermes-std/include/hermes/ConsoleReporter.h
@@ -28,6 +28,10 @@ namespace hermes
struct ConsoleReporter final : public hermes::Sink
{
void notify(const Message *m) final;
+ void set_colored_mode(bool is_colored) { _is_colored = is_colored; }
+
+private:
+ bool _is_colored = false;
};
} // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.cpp b/compiler/hermes-std/src/ConsoleReporter.cpp
index 3cc9f09ed..524ed59d8 100644
--- a/compiler/hermes-std/src/ConsoleReporter.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.cpp
@@ -17,16 +17,68 @@
#include "hermes/ConsoleReporter.h"
#include <iostream>
+#include <cstdlib>
+#include <string>
namespace hermes
{
+static constexpr const char *kTermColorRedTextCode = "\033[0;31m";
+static constexpr const char *kTermColorGreenTextCode = "\033[0;32m";
+static constexpr const char *kTermColorOrangeTextCode = "\033[0;33m";
+static constexpr const char *kTermColorBlueTextCode = "\033[0;34m";
+static constexpr const char *kTermColorMagentaTextCode = "\033[0;35m";
+static constexpr const char *kTermColorCyanTextCode = "\033[0;36m";
+static constexpr const char *kTermColorWhiteTextCode = "\033[0;37m";
+
+static constexpr const char *kTermBoldTextCode = "\033[1m";
+static constexpr const char *kTermUnderlineTextCode = "\033[4m";
+static constexpr const char *kTermInverseTextCode = "\033[7m";
+static constexpr const char *kTermBoldOffTextCode = "\033[21m";
+static constexpr const char *kTermUnderlineOffTextCode = "\033[24m";
+static constexpr const char *kTermInverseOffTextCode = "\033[27m";
+
+static constexpr const char *kTermColorResetAllCode = "\033[0m";
+
void ConsoleReporter::notify(const hermes::Message *m)
{
+ const char *env_color_p = std::getenv("ONE_HERMES_COLOR");
+ if (env_color_p)
+ {
+ auto env_color_str = std::string(env_color_p);
+ if ((env_color_str == "1") or (env_color_str == "ON"))
+ _is_colored = true;
+ }
+
+ if (_is_colored)
+ {
+ switch (m->get_severity())
+ {
+ case FATAL:
+ std::cout << kTermColorRedTextCode << kTermBoldTextCode << kTermUnderlineTextCode;
+ break;
+ case ERROR:
+ std::cout << kTermColorRedTextCode;
+ break;
+ case WARN:
+ std::cout << kTermColorOrangeTextCode;
+ break;
+ case INFO:
+ std::cout << kTermColorGreenTextCode;
+ break;
+ case VERBOSE:
+ std::cout << kTermColorResetAllCode;
+ break;
+ };
+ }
for (uint32_t n = 0; n < m->text()->lines(); ++n)
{
std::cout << m->text()->line(n) << std::endl;
}
+ if (_is_colored)
+ {
+ std::cout << kTermColorResetAllCode;
+ }
}
} // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp
index a65585a6a..d959ff3d9 100644
--- a/compiler/hermes-std/src/ConsoleReporter.test.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp
@@ -43,3 +43,168 @@ TEST(ConsoleReporterTest, notify)
ASSERT_NO_THROW(r.notify(&m));
}
+
+TEST(ConsoleReporterTest, notify_fatal)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is colored as FATAL" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+ }
+
+ hermes::ConsoleReporter r;
+
+ r.set_colored_mode(true);
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is colored as ERROR" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+ }
+
+ hermes::ConsoleReporter r;
+
+ r.set_colored_mode(true);
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is colored as WARN" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+ }
+
+ hermes::ConsoleReporter r;
+
+ r.set_colored_mode(true);
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is colored as INFO" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+ }
+
+ hermes::ConsoleReporter r;
+
+ r.set_colored_mode(true);
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is colored as VERBOSE" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
+ }
+
+ hermes::ConsoleReporter r;
+
+ r.set_colored_mode(true);
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_fatal_NEG)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is not colored as FATAL" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error_NEG)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is not colored as ERROR" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn_NEG)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is not colored as WARN" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info_NEG)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is not colored as INFO" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose_NEG)
+{
+ hermes::Message m;
+ {
+ std::stringstream ss;
+
+ ss << "This message is not colored as VERBOSE" << std::endl;
+
+ m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
+ }
+
+ hermes::ConsoleReporter r;
+
+ ASSERT_NO_THROW(r.notify(&m));
+}
diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt
index e1a71c2b4..d33e2d735 100644
--- a/compiler/hermes/CMakeLists.txt
+++ b/compiler/hermes/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(hermes STATIC ${SOURCES})
-set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(hermes PUBLIC include)
# Let's apply nncc common compile options
#
diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h
index 460163f64..d76f0eb6f 100644
--- a/compiler/hermes/include/hermes/core/Message.h
+++ b/compiler/hermes/include/hermes/core/Message.h
@@ -17,6 +17,8 @@
#ifndef __HERMES_MESSAGE_H__
#define __HERMES_MESSAGE_H__
+#include "Severity.h"
+
#include <memory>
#include <sstream>
#include <string>
@@ -48,7 +50,6 @@ private:
* @brief Message with metadata
*
* TODO Add "Timestamp" field
- * TODO Add "Severity" field
* TODO Support extensible "attribute" annotation
*/
class Message final
@@ -58,10 +59,17 @@ public:
public:
void text(std::unique_ptr<MessageText> &&text) { _text = std::move(text); }
+ void text(std::unique_ptr<MessageText> &&text, SeverityCategory severity)
+ {
+ _text = std::move(text);
+ _severity = severity;
+ }
const MessageText *text(void) const { return _text.get(); }
+ SeverityCategory get_severity(void) const { return _severity; }
private:
std::unique_ptr<MessageText> _text;
+ SeverityCategory _severity = SeverityCategory::INFO;
};
} // namespace hermes
diff --git a/compiler/hermes/include/hermes/core/MessageBuffer.h b/compiler/hermes/include/hermes/core/MessageBuffer.h
index a2f1de74d..1e2e9b9dc 100644
--- a/compiler/hermes/include/hermes/core/MessageBuffer.h
+++ b/compiler/hermes/include/hermes/core/MessageBuffer.h
@@ -18,6 +18,7 @@
#define __HERMES_MESSAGE_BUFFER_H__
#include "hermes/core/MessageBus.h"
+#include "hermes/core/Severity.h"
#include <ostream>
#include <sstream>
@@ -34,6 +35,7 @@ class MessageBuffer final
{
public:
MessageBuffer(MessageBus *);
+ MessageBuffer(MessageBus *bus, SeverityCategory severity);
~MessageBuffer();
public:
@@ -41,6 +43,7 @@ public:
private:
MessageBus *_bus;
+ SeverityCategory _severity = SeverityCategory::INFO;
/// @brief Content buffer
std::stringstream _ss;
diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp
index a4ff4eeff..ce1f176d9 100644
--- a/compiler/hermes/src/core/MessageBuffer.cpp
+++ b/compiler/hermes/src/core/MessageBuffer.cpp
@@ -26,13 +26,19 @@ MessageBuffer::MessageBuffer(MessageBus *bus) : _bus{bus}
// DO NOTHING
}
+MessageBuffer::MessageBuffer(MessageBus *bus, SeverityCategory severity)
+ : _bus{bus}, _severity{severity}
+{
+ // DO NOTHING
+}
+
MessageBuffer::~MessageBuffer()
{
// NOTE The current implementation is unsafe as it may throw an excpetion.
// TODO Find a better safe implementation.
auto msg = std::make_unique<Message>();
- msg->text(std::make_unique<MessageText>(_ss));
+ msg->text(std::make_unique<MessageText>(_ss), _severity);
_bus->post(std::move(msg));
}
diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp
index d124f4430..cb60d9a31 100644
--- a/compiler/hermes/src/core/Source.cpp
+++ b/compiler/hermes/src/core/Source.cpp
@@ -60,10 +60,9 @@ void Source::deactivate(void)
void Source::reload(const Config *c) { c->configure(this, _setting); }
-std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
+std::unique_ptr<MessageBuffer> Source::buffer(const Severity &severity) const
{
- // TODO Pass Severity
- return std::make_unique<MessageBuffer>(_bus);
+ return std::make_unique<MessageBuffer>(_bus, severity.category());
}
} // namespace hermes
diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt
index 308f48619..34835e483 100644
--- a/compiler/locomotiv/CMakeLists.txt
+++ b/compiler/locomotiv/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(locomotiv STATIC ${SOURCES})
-set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(locomotiv PUBLIC include)
target_include_directories(locomotiv PRIVATE src)
target_link_libraries(locomotiv PUBLIC loco)
diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt
index f02fb1a72..43ec41af4 100644
--- a/compiler/locop/CMakeLists.txt
+++ b/compiler/locop/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(locop STATIC ${SOURCES})
-set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(locop PUBLIC include)
target_link_libraries(locop PUBLIC loco)
# Let's apply nncc common compile options
diff --git a/compiler/logo-core/CMakeLists.txt b/compiler/logo-core/CMakeLists.txt
index 3bc71dbd0..374794f90 100644
--- a/compiler/logo-core/CMakeLists.txt
+++ b/compiler/logo-core/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(logo_core STATIC ${SOURCES})
-set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(logo_core PRIVATE src)
target_include_directories(logo_core PUBLIC include)
target_link_libraries(logo_core PUBLIC loco)
diff --git a/compiler/logo-ex/CMakeLists.txt b/compiler/logo-ex/CMakeLists.txt
new file mode 100644
index 000000000..31d76025e
--- /dev/null
+++ b/compiler/logo-ex/CMakeLists.txt
@@ -0,0 +1,23 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(logo_ex STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(logo_ex PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(logo_ex PRIVATE src)
+target_include_directories(logo_ex PUBLIC include)
+target_link_libraries(logo_ex PUBLIC loco)
+target_link_libraries(logo_ex PUBLIC logo_core)
+target_link_libraries(logo_ex PRIVATE locomotiv)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(logo_ex_test ${TESTS})
+target_include_directories(logo_ex_test PRIVATE src)
+target_link_libraries(logo_ex_test logo_ex)
diff --git a/compiler/logo-ex/README.md b/compiler/logo-ex/README.md
new file mode 100644
index 000000000..8ea55a202
--- /dev/null
+++ b/compiler/logo-ex/README.md
@@ -0,0 +1,6 @@
+# logo-ex
+
+_logo-ex_ provides _loco_ Extended Graph Passes for Transformation and Optimization
+that gets help from _locomotiv_
+
+NOTE: f2e7c38dcc601cb290c380d8314a3ae627923f58 is where this came from
diff --git a/compiler/logo/include/logo/ConstantFoldingPass.h b/compiler/logo-ex/include/logo/ConstantFoldingPass.h
index 99ccdc315..9143ae49b 100644
--- a/compiler/logo/include/logo/ConstantFoldingPass.h
+++ b/compiler/logo-ex/include/logo/ConstantFoldingPass.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __LOGO_CONSTANT_FOLDING_PASS_H__
-#define __LOGO_CONSTANT_FOLDING_PASS_H__
+#ifndef __LOGO_EX_CONSTANT_FOLDING_PASS_H__
+#define __LOGO_EX_CONSTANT_FOLDING_PASS_H__
#include <logo/Pass.h>
@@ -38,4 +38,4 @@ public:
} // namespace logo
-#endif // __LOGO_CONSTANT_FOLDING_PASS_H__
+#endif // __LOGO_EX_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo-ex/include/logo/PassesEx.h b/compiler/logo-ex/include/logo/PassesEx.h
new file mode 100644
index 000000000..8bdf93bd9
--- /dev/null
+++ b/compiler/logo-ex/include/logo/PassesEx.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PASSES_EX_H__
+#define __LOGO_PASSES_EX_H__
+
+// Please keep this in alphabetical order
+
+#include <logo/ConstantFoldingPass.h>
+
+#endif // __LOGO_PASSES_EX_H__
diff --git a/compiler/logo-ex/requires.cmake b/compiler/logo-ex/requires.cmake
new file mode 100644
index 000000000..c76183353
--- /dev/null
+++ b/compiler/logo-ex/requires.cmake
@@ -0,0 +1,3 @@
+require("loco")
+require("logo-core")
+require("locomotiv")
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp
index 2bd4759ca..97d75458b 100644
--- a/compiler/logo/src/Passes/ConstantFoldingPass.cpp
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp
index 5d222eb00..ba571a7f6 100644
--- a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/compiler/logo-ex/src/TestHelper.h b/compiler/logo-ex/src/TestHelper.h
new file mode 100644
index 000000000..07e3b20aa
--- /dev/null
+++ b/compiler/logo-ex/src/TestHelper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+namespace logo
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_by_type(loco::Graph *g)
+{
+ T *first_node = nullptr;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ first_node = dynamic_cast<T *>(node);
+ if (first_node != nullptr)
+ break;
+ }
+
+ return first_node;
+}
+
+} // namespace test
+} // namespace logo
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt
index a8efd9b03..e6a6f907f 100644
--- a/compiler/logo/CMakeLists.txt
+++ b/compiler/logo/CMakeLists.txt
@@ -3,12 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(logo STATIC ${SOURCES})
-set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(logo PRIVATE src)
target_include_directories(logo PUBLIC include)
target_link_libraries(logo PUBLIC loco)
target_link_libraries(logo PUBLIC logo_core)
-target_link_libraries(logo PRIVATE locomotiv)
if(NOT ENABLE_TEST)
return()
diff --git a/compiler/logo/include/logo/Passes.h b/compiler/logo/include/logo/Passes.h
index 636251e45..06fd3212b 100644
--- a/compiler/logo/include/logo/Passes.h
+++ b/compiler/logo/include/logo/Passes.h
@@ -19,7 +19,6 @@
// Please keep this in alphabetical order
-#include <logo/ConstantFoldingPass.h>
#include <logo/RemoveDeadNodePass.h>
#include <logo/RemoveForwardNodePass.h>
#include <logo/ReorderDecodePass.h>
diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake
index c76183353..3e4d227cd 100644
--- a/compiler/logo/requires.cmake
+++ b/compiler/logo/requires.cmake
@@ -1,3 +1,2 @@
require("loco")
require("logo-core")
-require("locomotiv")
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md
index 4a9a34e6d..77ec5c81c 100644
--- a/compiler/luci-interpreter/README.md
+++ b/compiler/luci-interpreter/README.md
@@ -111,7 +111,7 @@ Note that one memory manager could be shared between multiple interpreter instan
List of predefined memory managers:
- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
-- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
diff --git a/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
index 7dee8a7f2..8e2f457a5 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -50,7 +50,9 @@ public:
class Interpreter
{
public:
- explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr);
+ explicit Interpreter(const luci::Module *module);
+
+ explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
~Interpreter();
@@ -69,7 +71,6 @@ private:
// the order of deletion in the destructor
std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
std::unique_ptr<class RuntimeModule> _runtime_module;
- IMemoryManager *_memory_manager = nullptr;
// Observer functionality support.
std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
index 771974afe..d134a6b95 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ assert(scratchpad_data != nullptr);
+
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ assert(batches == 1);
+
+ const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = 1;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = 1;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = depth;
+
+ cmsis_nn_pool_params pool_params;
+ pool_params.stride.h = params.stride_height;
+ pool_params.stride.w = params.stride_width;
+ pool_params.padding.h = params.padding_values.height;
+ pool_params.padding.w = params.padding_values.width;
+ pool_params.activation.min = params.quantized_activation_min;
+ pool_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = 1;
+ filter_dims.h = params.filter_height;
+ filter_dims.w = params.filter_width;
+ filter_dims.c = 1;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+ auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+ output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
index 0a8ae4e48..cfb84ea60 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -19,6 +19,8 @@
#include <tensorflow/lite/kernels/internal/reference/conv.h>
#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
namespace luci_interpreter_pal
{
@@ -26,11 +28,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data,
tflite::RuntimeShape(), nullptr);
@@ -40,14 +42,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, nullptr);
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,14 +57,141 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
- tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
- filter_shape, filter_data, bias_shape, bias_data,
- output_shape, output_data);
+ if (scratchpad_data)
+ {
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ assert(conv_params.dilation.h == 1);
+ assert(conv_params.dilation.w == 1);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+ conv_params.activation.min = params.quantized_activation_min;
+ conv_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ quant_params.multiplier = const_cast<int32_t *>(mult);
+ quant_params.shift = const_cast<int32_t *>(shifts);
+
+ assert(conv_params.activation.min <= conv_params.activation.max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+ &filter_dims, filter_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+ conv_params.dilation.w == 1)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_height;
+ filter_dims.w = filter_width;
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = output_height;
+ output_dims.w = output_width;
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+ &filter_dims, &output_dims);
+
+ luci_interpreter::Shape scratchpad_shape{buf_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
}
} // namespace luci_interpreter_pal
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+ assert(dw_conv_params.dilation.h == 1);
+ assert(dw_conv_params.dilation.w == 1);
+
+ dw_conv_params.input_offset = params.input_offset;
+ dw_conv_params.output_offset = params.output_offset;
+ dw_conv_params.stride.h = params.stride_height;
+ dw_conv_params.stride.w = params.stride_width;
+ dw_conv_params.padding.h = params.padding_values.height;
+ dw_conv_params.padding.w = params.padding_values.width;
+
+ dw_conv_params.activation.min = params.quantized_activation_min;
+ dw_conv_params.activation.max = params.quantized_activation_max;
+ dw_conv_params.ch_mult = params.depth_multiplier;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ int32_t output_multiplier = params.output_multiplier;
+ int32_t output_shift = params.output_shift;
+
+ quant_params.multiplier = &output_multiplier;
+ quant_params.shift = &output_shift;
+
+ assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+ input_data, &filter_dims, filter_data, &bias_dims,
+ bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+ dw_conv_params.dilation.w == 1)
+ {
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+ &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ assert(output_shape.DimensionsCount() == 2);
+
+ const int batches = output_shape.Dims(0);
+ const int output_depth = output_shape.Dims(1);
+
+ const int filter_dim_count = filter_shape.DimensionsCount();
+ const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+ cmsis_nn_fc_params fc_params;
+ fc_params.input_offset = params.input_offset;
+ fc_params.output_offset = params.output_offset;
+ fc_params.filter_offset = params.weights_offset;
+ fc_params.activation.min = params.quantized_activation_min;
+ fc_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_tensor_quant_params quant_params;
+ quant_params.multiplier = params.output_multiplier;
+ quant_params.shift = params.output_shift;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = 1;
+ input_dims.w = 1;
+ input_dims.c = accum_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = accum_depth;
+ filter_dims.h = 1;
+ filter_dims.w = 1;
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = 1;
+ output_dims.w = 1;
+ output_dims.c = output_depth;
+
+ int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+ auto buffer = std::make_unique<int8_t[]>(buf_size);
+ assert(buffer != nullptr);
+
+ cmsis_nn_context ctx;
+ ctx.buf = buffer.get();
+ ctx.size = buf_size;
+
+ auto res =
+ arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+ filter_data, &bias_dims, bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
index 2b46b100c..347a97a83 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -21,21 +21,21 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = input_shape.Dims(0);
+ input_dims.h = input_shape.Dims(1);
+
+ cmsis_nn_dims weights_feature_dims;
+ weights_feature_dims.n = weight_feature_shape.Dims(0);
+ weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+ cmsis_nn_dims weights_time_dims;
+ weights_time_dims.n = weight_time_shape.Dims(0);
+ weights_time_dims.h = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = bias_shape.Dims(0);
+
+ cmsis_nn_dims state_dims;
+ state_dims.n = batch_size;
+ state_dims.h = memory_size * num_filters;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = output_shape.Dims(0);
+ output_dims.h = output_shape.Dims(1);
+
+ cmsis_nn_svdf_params svdf_params;
+ svdf_params.rank = params.rank;
+ svdf_params.input_offset = input_zp;
+ svdf_params.output_offset = output_zp;
+
+ svdf_params.input_activation.min = INT16_MIN;
+ svdf_params.input_activation.max = INT16_MAX;
+
+ svdf_params.output_activation.min = INT8_MIN;
+ svdf_params.output_activation.max = INT8_MAX;
+
+ cmsis_nn_per_tensor_quant_params in_quant_params;
+ in_quant_params.multiplier = scale_1_a;
+ in_quant_params.shift = scale_1_b;
+
+ cmsis_nn_per_tensor_quant_params out_quant_params;
+ out_quant_params.multiplier = scale_2_a;
+ out_quant_params.shift = scale_2_b;
+
+ cmsis_nn_context scratch_ctx;
+ scratch_ctx.buf = scratchpad_data;
+
+ cmsis_nn_context scratch_output_ctx;
+ scratch_output_ctx.buf = output_temp_data;
+
+ arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+ &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+ weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
index 9a25a3c5d..a68b363d9 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -42,9 +42,12 @@ macro(add_pal_to_target TGT)
"${TensorFlowSource_DIR}")
target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+ list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
- set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
"${TensorFlowRuySource_DIR}"
"${TensorFlowGEMMLowpSource_DIR}"
@@ -53,7 +56,7 @@ macro(add_pal_to_target TGT)
)
add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
- target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
"${CMSISSource_DIR}/CMSIS/NN/Include"
"${CMSISSource_DIR}/CMSIS/DSP/Include"
"${CMSISSource_DIR}/CMSIS/Core/Include")
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
index 9d541276c..428b15ee0 100644
--- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -1,19 +1,23 @@
REGISTER_KERNEL(Add)
REGISTER_KERNEL(ArgMax)
REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
REGISTER_KERNEL(BatchToSpaceND)
REGISTER_KERNEL(Cast)
REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
REGISTER_KERNEL(Greater)
REGISTER_KERNEL(GreaterEqual)
REGISTER_KERNEL(If)
@@ -37,11 +41,13 @@ REGISTER_KERNEL(MirrorPad)
REGISTER_KERNEL(Mul)
REGISTER_KERNEL(Neg)
REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
REGISTER_KERNEL(Pack)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(Pow)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Relu)
REGISTER_KERNEL(Relu6)
REGISTER_KERNEL(Reshape)
@@ -61,6 +67,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+ const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+ luci_interpreter::Tensor *rhs_scratchpad,
+ const tflite::RuntimeShape &lhs_shape,
+ const tflite::RuntimeShape &rhs_shape)
+{
+ // Scratchpad for transposed LHS
+ {
+ auto lhs_rank = lhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(lhs_rank);
+ for (int i = 0; i < lhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = lhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+ scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+ lhs_scratchpad->resize(scratchpad_size);
+ }
+ // Scratchpad for transposed RHS
+ {
+ auto rhs_rank = rhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(rhs_rank);
+ for (int i = 0; i < rhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = rhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+ scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+ rhs_scratchpad->resize(scratchpad_size);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h
index 2550dd5d7..985a15f39 100644
--- a/compiler/luci-interpreter/pal/linux/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -26,14 +26,24 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- if (im2col_data)
+ (void)scratchpad_shape;
+ if (scratchpad_data)
{
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+ input_depth * filter_height * filter_width};
+
tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data);
+ scratchpad_data);
}
else
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
@@ -45,8 +55,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
// TODO This should only be done once (although it takes only a few microseconds).
// Also, the user should be able to adjust the number of threads.
@@ -54,8 +64,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, gemmlowp_context.get());
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, gemmlowp_context.get());
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -63,17 +73,55 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
// TODO enable optimized version
tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data);
}
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+
+ // Allocate tensor for scratchpad, if needed.
+ // The checks here should be aligned with the actual implementation.
+ const bool need_dilated_scratchpad =
+ params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+ const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+ filter_height != 1 || filter_width != 1;
+ auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+ (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+ if (_need_scratchpad)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+ int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+ filter_width * data_type_size;
+ luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
} // namespace luci_interpreter_pal
#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h
index cfaec1b58..a8a9d4abc 100644
--- a/compiler/luci-interpreter/pal/linux/PALMul.h
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -21,21 +21,31 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+ const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+ int64_t *output_data)
+{
+ tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, activation_state_data, output_shape,
+ output_data, scratchpad_data, output_temp_data, scale_1_a,
+ scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, scratchpad_data,
+ activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake
index 84349e0bf..185700cf9 100644
--- a/compiler/luci-interpreter/pal/linux/pal.cmake
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -40,7 +40,35 @@ macro(add_pal_to_target TGT)
# TODO put it back, I changed my mind.
# instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+ if(BUILD_ARM32_NEON)
+ # NOTE may need to revise this list for version upgrade
+ set(PAL_SOURCES ${PAL_SOURCES}
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+ ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+ ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+ ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+ ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+ ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+ ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+ ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+ ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+ ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+ ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+ ${TensorFlowRuySource_DIR}/ruy/tune.cc
+ ${TensorFlowRuySource_DIR}/ruy/wait.cc
+ ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+ )
+ endif(BUILD_ARM32_NEON)
+
add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
index 771974afe..d134a6b95 100644
--- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
index 0a8ae4e48..13976877a 100644
--- a/compiler/luci-interpreter/pal/mcu/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -26,11 +26,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data,
tflite::RuntimeShape(), nullptr);
@@ -40,14 +40,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, nullptr);
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,16 +55,31 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data);
}
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ (void)input_data_type;
+ (void)params;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+ scratchpad->set_allocatable(false);
+}
+
} // namespace luci_interpreter_pal
#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h
index 2b46b100c..347a97a83 100644
--- a/compiler/luci-interpreter/pal/mcu/PALMul.h
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -21,21 +21,21 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int n_rank = params.rank;
+ const int n_batch = input_shape.Dims(0);
+ const int n_input = input_shape.Dims(1);
+ const int n_filter = weight_feature_shape.Dims(0);
+ const int n_unit = n_filter / n_rank;
+ const int n_memory = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ int16_t *new_state_start = activation_state_data;
+ const int16_t *old_state_start = activation_state_data + 1;
+ const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Feature matmul.
+ {
+ const int32_t output_max = std::numeric_limits<int16_t>::max();
+ const int32_t output_min = std::numeric_limits<int16_t>::min();
+ int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+ for (int b = 0; b < n_batch; b++)
+ {
+ const int8_t *matrix_ptr = weight_feature_data;
+ for (int r = 0; r < n_filter; r++)
+ {
+ int32_t dot_prod = 0;
+ const int8_t *vector_in_batch = input_data + b * n_input;
+ for (int c = 0; c < n_input; c++)
+ {
+ dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+ }
+ dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+ dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+ // This assumes state is symmetrically quantized. Otherwise last bit of
+ // state should be initialized to its zero point and accumulate the
+ // dot_prod.
+ // Equivalent as the following:
+ // result_in_batch = zero point, which happens to be zero.
+ // result_in_batch += dot_prod_56.
+ *result_in_batch = dot_prod;
+ result_in_batch += n_memory;
+ }
+ }
+ }
+
+ // Time.
+ {
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Perform batched vector dot product:
+ const int16_t *vector1_ptr = weight_time_data;
+ const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+ for (int i = 0; i < n_filter; i++)
+ {
+ *scratch_ptr_batch = 0;
+ for (int j = 0; j < n_memory; j++)
+ {
+ *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+ }
+ scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Reduce, add bias, rescale, activation.
+ {
+ // Add bias.
+ if (bias_data)
+ {
+ // Vector batch assign:
+ for (int i = 0; i < n_batch; ++i)
+ {
+ int32_t *output_ptr = output_temp_data + i * n_unit;
+ const int32_t *bias_ptr = bias_data;
+ for (int j = 0; j < n_unit; ++j)
+ {
+ *output_ptr++ = *bias_ptr++;
+ }
+ }
+ }
+ else
+ {
+ int32_t *output_ptr = output_temp_data;
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ *output_ptr++ = 0;
+ }
+ }
+
+ // Reduce.
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Reduction sum vector
+ for (int i = 0; i < n_unit; ++i)
+ {
+ for (int j = 0; j < n_rank; ++j)
+ {
+ output_temp_ptr[i] += *scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Rescale.
+ const int32_t output_max = std::numeric_limits<int8_t>::max();
+ const int32_t output_min = std::numeric_limits<int8_t>::min();
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ int32_t x1 = output_temp_data[i];
+ int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+ int32_t x3 = x2 + output_zp;
+ int32_t x4 = std::min(std::max(output_min, x3), output_max);
+ output_data[i] = static_cast<int8_t>(x4);
+ }
+ }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake
index a479d407b..907d51de6 100644
--- a/compiler/luci-interpreter/pal/mcu/pal.cmake
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -39,7 +39,9 @@ macro(add_pal_to_target TGT)
# TODO put it back, I changed my mind.
# instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_mcu_pal PRIVATE
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index e37150336..997b75a84 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -13,6 +13,7 @@ set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
add_subdirectory(core)
message(STATUS "LUCI INTERPRETER CORE")
@@ -20,6 +21,8 @@ add_subdirectory(kernels)
message(STATUS "LUCI INTERPRETER KERNELS")
add_subdirectory(loader)
message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
message(STATUS "LUCI INTERPTER INITALIZED")
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
index 1b8792a6c..8cf272efd 100644
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -70,25 +70,30 @@ private:
} // namespace
+Interpreter::Interpreter(const luci::Module *module)
+{
+ _runtime_to_ir = std::make_unique<RuntimeToIR>();
+ _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+ _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+ _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+ ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+ _default_memory_manager.get());
+ loader.load();
+}
+
Interpreter::Interpreter(const luci::Module *module,
luci_interpreter::IMemoryManager *memory_manager)
{
+ assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
_runtime_to_ir = std::make_unique<RuntimeToIR>();
_event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
_runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
- if (memory_manager == nullptr)
- {
- _default_memory_manager = std::make_unique<SimpleMemoryManager>();
- _memory_manager = _default_memory_manager.get();
- }
- else
- {
- _memory_manager = memory_manager;
- }
-
ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
- _memory_manager);
+ memory_manager);
loader.load();
}
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt
index 4430cba11..c2471e01c 100644
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -10,7 +10,9 @@ set(SOURCES
Tensor.cpp)
add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
index ee0390fcc..958fd4b74 100644
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -43,6 +43,12 @@ struct ArgMaxParams
DataType output_type;
};
+struct BatchMatMulParams
+{
+ bool adj_x;
+ bool adj_y;
+};
+
struct ConcatenationParams
{
int axis;
@@ -83,6 +89,13 @@ struct DivParams
struct FullyConnectedParams
{
Activation activation;
+ bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+ int32_t axis;
+ int32_t batch_dims;
};
struct InstanceNormParams
@@ -119,6 +132,11 @@ struct MulParams
Activation activation;
};
+struct OneHotParams
+{
+ int32_t axis;
+};
+
struct PackParams
{
int32_t values_count;
@@ -157,6 +175,13 @@ struct SubParams
Activation activation;
};
+struct SVDFParams
+{
+ bool asymmetric_quantize_inputs;
+ int32_t svdf_rank;
+ Activation activation;
+};
+
struct SpaceToDepthParams
{
int block_size;
diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+ GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+ auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+ {
+ // redefine NodeBuilder of BUFFER type
+ builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+ }
+
+ return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+ GraphBuilderContext *context) const
+{
+ assert(tensor_index >= 0);
+
+ const auto graph = context->graph();
+ const auto reader = context->reader();
+ const auto tensors = reader->tensors();
+ auto const const_tensor = tensors[tensor_index];
+ assert(const_tensor != nullptr);
+ if (const_tensor->is_variable())
+ {
+ // Create CircleVariable for variable
+ return nullptr;
+ }
+
+ auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+ auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+ if (const_dims.empty() && buffer.empty())
+ {
+ // unknown shape tensor and scalar tensor
+ return nullptr;
+ }
+
+ // if tensor_index is used as output to some other operator, this is not a constant
+ auto tensoroutputs = context->tensoroutputs();
+ if (tensoroutputs->find(tensor_index))
+ {
+ // other operator output tensor
+ return nullptr;
+ }
+
+ uint32_t num_elements = 1;
+ for (uint32_t r = 0; r < const_dims.size(); ++r)
+ {
+ num_elements = num_elements * const_dims[r];
+ }
+
+ if (buffer.empty() && num_elements > 0)
+ {
+ // normal empty tensor
+ return nullptr;
+ }
+
+ // create CircleReferencingConst
+ auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+ {
+ custom_node->custom_code("CircleReferencingConst");
+
+ copy_tensor_attributes(const_tensor, custom_node);
+ custom_node->shape_status(luci::ShapeStatus::VALID);
+
+ // custom options stores size of buffer and pointer's value to buffer's data
+ {
+ std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+ {
+ auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+ const_data_ref = {buffer.data(), buffer.size()};
+ }
+ custom_node->custom_options(custom_options);
+ }
+ }
+
+ // Output of CircleCustom node presented with CircleConstNode
+ auto out_node = graph->nodes()->create<CircleCustomOut>();
+ {
+ out_node->index(0);
+ out_node->input(custom_node);
+
+ copy_tensor_attributes(const_tensor, out_node);
+ out_node->shape_status(luci::ShapeStatus::VALID);
+ }
+
+ return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+ CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp
index 7381c3849..d7bf3084f 100644
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -38,8 +38,11 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa
void Add::configure()
{
LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
if (input1()->element_type() == DataType::S16)
{
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1);
LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
output()->zero_point() == 0);
}
@@ -54,6 +57,12 @@ void Add::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -67,13 +76,8 @@ void Add::execute() const
void Add::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -92,6 +96,28 @@ void Add::evalFloat() const
}
}
+template <typename T> void Add::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Add::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h
index 79518845d..91d95b6af 100644
--- a/compiler/luci-interpreter/src/kernels/Add.h
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
void evalQuantizedS16() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
index 847b65667..b8b1c3089 100644
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -166,6 +166,69 @@ TEST_F(AddTest, Float)
}
}
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1,
+ 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+ {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+ {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0,
+ 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+ {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(AddTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(AddTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
@@ -248,11 +311,24 @@ TEST_F(AddTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
AddParams params{};
params.activation = Activation::RELU;
@@ -263,6 +339,19 @@ TEST_F(AddTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 119c69ccf..474f4b321 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -57,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ArgMaxTest, DataTypes);
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
TYPED_TEST(ArgMaxTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index 5545fb4d4..d3bade9e4 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -18,8 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include "PALAveragePool2d.h"
#include <stdexcept>
@@ -29,8 +28,9 @@ namespace luci_interpreter
namespace kernels
{
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params)
+ : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
{
}
@@ -76,6 +76,10 @@ void AveragePool2D::configure()
LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
}
output()->resize({batches, output_height, output_width, depth});
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+ getTensorShape(input()), getTensorShape(output()));
}
void AveragePool2D::execute() const
@@ -155,9 +159,14 @@ void AveragePool2D::evalSInt8() const
params.quantized_activation_min = activation_min;
params.quantized_activation_max = activation_max;
- tflite::reference_integer_ops::AveragePool(
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::AveragePool<int8_t>(
params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
- getTensorData<int8_t>(output()));
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void AveragePool2D::evalSInt16() const
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
index b98367f31..2c8fe16e7 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -28,7 +28,8 @@ namespace kernels
class AveragePool2D : public KernelWithParams<Pool2DParams>
{
public:
- AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+ AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params);
const Tensor *input() const { return _inputs[0]; }
Tensor *output() const { return _outputs[0]; }
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index 7ed421129..478bfa68e 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -46,6 +46,7 @@ TEST_F(AveragePool2DTest, Float)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -55,8 +56,9 @@ TEST_F(AveragePool2DTest, Float)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -78,6 +80,7 @@ TEST_F(AveragePool2DTest, Uint8_0)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -87,8 +90,9 @@ TEST_F(AveragePool2DTest, Uint8_0)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -107,6 +111,7 @@ TEST_F(AveragePool2DTest, Uint8_1)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -116,9 +121,10 @@ TEST_F(AveragePool2DTest, Uint8_1)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
@@ -141,6 +147,7 @@ TEST_F(AveragePool2DTest, SInt16)
Tensor input_tensor =
makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -150,8 +157,9 @@ TEST_F(AveragePool2DTest, SInt16)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -174,6 +182,7 @@ TEST_F(AveragePool2DTest, SInt8)
Tensor input_tensor = makeInputTensor<DataType::S8>(
input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -183,8 +192,9 @@ TEST_F(AveragePool2DTest, SInt8)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -203,6 +213,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -212,7 +223,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -227,6 +238,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -236,7 +248,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -252,6 +264,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -261,7 +274,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+ tflite::RuntimeShape swapped_shape(shape);
+ const int32_t dims = shape.DimensionsCount();
+ swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+ swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+ return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+ Tensor *y_tmp, const BatchMatMulParams &params)
+ : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+ auto lhs = x();
+ auto rhs = y();
+ auto adj_x = params().adj_x;
+ auto adj_y = params().adj_y;
+
+ // TODO Support non-float types
+ if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+ throw std::runtime_error("Unsupported type.");
+
+ LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+ auto lhs_rank = lhs->shape().num_dims();
+ auto rhs_rank = rhs->shape().num_dims();
+ LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+ LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+ auto lhs_scratchpad = temp_lhs();
+ auto rhs_scratchpad = temp_rhs();
+ luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+ getTensorShape(rhs));
+
+ auto output_rank = std::max(lhs_rank, rhs_rank);
+
+ auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+ auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+ // Ensure any batch dimensions obey broacasting rules.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ if (lhs_dim != rhs_dim)
+ {
+ if (lhs_dim != 1)
+ {
+ LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+ }
+ }
+ }
+
+ // Ensure other dimensions work for matrix multiplication.
+ int accum_dim_lhs =
+ adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+ int accum_dim_rhs =
+ adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+ LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+ Shape output_shape(output_rank);
+ // Fill in any broadcast dimensions.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ int broadcast_dim = lhs_dim;
+ if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+ {
+ broadcast_dim = rhs_dim;
+ }
+ output_shape.dim(i) = broadcast_dim;
+ }
+ // Fill in the matmul dimensions.
+ int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+ int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+ output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+ output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+ output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+ tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+ tflite::RuntimeShape shape(getTensorShape(tensor_in));
+ tflite::TransposeParams params;
+ int rank = shape.DimensionsCount();
+ params.perm_count = rank;
+ for (int i = 0; i < rank - 2; ++i)
+ {
+ params.perm[i] = i;
+ }
+ // Transpose the last two dimensions.
+ params.perm[rank - 2] = rank - 1;
+ params.perm[rank - 1] = rank - 2;
+ transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+ transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+ switch (tensor_in->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+ transposed_shape, getTensorData<float>(tensor_out));
+ break;
+ default:
+ throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+ }
+}
+
+void BatchMatMul::execute() const
+{
+ auto lhs = x();
+ auto rhs = y();
+
+ bool adj_x = params().adj_x;
+ bool adj_y = params().adj_y;
+
+ auto orig_lhs_shape = getTensorShape(lhs);
+ auto orig_rhs_shape = getTensorShape(rhs);
+
+ auto rhs_tensor = adj_y ? rhs : temp_rhs();
+ auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+ if (not adj_y)
+ {
+ TransposeRowsColumns(rhs, temp_rhs());
+ }
+ if (adj_x)
+ {
+ TransposeRowsColumns(lhs, temp_lhs());
+ }
+ tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+ tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+ getTensorData<float>(lhs_tensor), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+ BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+ const BatchMatMulParams &params);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ Tensor *temp_lhs() const { return _outputs[1]; }
+ Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = true;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = true;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+ 767., 800., 833., 866.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+ Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
index f3a344974..52647a763 100644
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -58,7 +58,7 @@ template <typename T> class BatchToSpaceNDTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes);
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
TYPED_TEST(BatchToSpaceNDTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index 1b7d0f66a..9f4ba0e0b 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -15,7 +15,9 @@ endmacro(REGISTER_KERNEL)
include(${KERNEL_REGISTER_FILE})
add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
index 731260522..4713ad34c 100644
--- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -79,7 +79,7 @@ template <typename T> class CastTest : public ::testing::Test
using IntDataTypes =
::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
-TYPED_TEST_CASE(CastTest, IntDataTypes);
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
TYPED_TEST(CastTest, FloatToInt)
{
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
index 7cfdf34b9..46ee5941e 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -69,11 +69,21 @@ void Concatenation::configure()
Shape output_shape = t0->shape();
output_shape.dim(axis) = sum_axis;
- // TODO S8 type needs more checking: quantization parameters of all input tensors and the output
- // tensor should be the same. Note that there is no such requirement for U8 type.
- if (t0->element_type() == DataType::S8)
- throw std::runtime_error("Unsupported type.");
+ // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+ // should be the same
+ for (auto current_tensor : _inputs)
+ {
+ if (current_tensor->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+ output()->quantized_dimension());
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+ current_tensor->scales().size());
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+ LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+ }
+ }
output()->resize(output_shape);
}
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
index e4b50611a..f893b38fd 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -183,12 +183,12 @@ TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
{
- std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get());
+ std::vector<uint8_t> input1_data{1, 2, 3, 4};
+ std::vector<int8_t> input2_data{5, 6, 7, 8};
+ Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S8);
ConcatenationParams params{};
@@ -199,6 +199,51 @@ TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ int quantized_dimension = 3;
+ std::vector<float> scales{0.1, 0.2, 0.3};
+ std::vector<int32_t> zero_points{1, -1, 1};
+
+ Tensor input1_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4};
+ std::vector<float> input2_data{5, 6, 7, 8};
+ float scale = 0.1;
+ int32_t zero_point_1 = 1;
+ int32_t zero_point_2 = -1;
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
// TODO: Remove this test when concat w/ fused_activation is supported
TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
{
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 5647f4c44..234f95425 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -30,8 +30,8 @@ namespace kernels
{
Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *im2col, const Conv2DParams &params)
- : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params)
+ Tensor *scratchpad, const Conv2DParams &params)
+ : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
{
}
@@ -108,27 +108,18 @@ void Conv2D::configure()
output()->resize({batches, output_height, output_width, output_depth});
- // Allocate tensor for Im2Col, if needed.
- // The checks here should be aligned with the actual implementation.
- const bool need_dilated_im2col =
- _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
- const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
- filter_height != 1 || filter_width != 1;
- _need_im2col =
- input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
- if (_need_im2col)
- {
- const int input_depth = input_shape.dim(3);
- Shape im2col_shape{batches, output_height, output_width,
- input_depth * filter_height * filter_width};
- auto im2col = getOutputTensors()[1];
- im2col->resize(im2col_shape);
- }
- else
- {
- auto im2col = getOutputTensors()[1];
- im2col->set_allocatable(false);
- }
+ // Allocate tensor for scratchpad, if needed.
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
switch (_params.activation)
{
@@ -193,16 +184,16 @@ void Conv2D::evalFloat() const
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
- float *im2col_data = nullptr;
- auto im2col = getOutputTensors()[1];
- if (_need_im2col)
- {
- im2col_data = im2col->data<float>();
- }
- luci_interpreter_pal::Conv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data);
+ auto scratchpad = getOutputTensors()[1];
+ float *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<float>();
+
+ luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(filter()), getTensorData<float>(filter()),
+ getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()),
+ getTensorShape(scratchpad), scratchpad_data);
}
void Conv2D::evalQuantized() const
@@ -236,12 +227,12 @@ void Conv2D::evalQuantized() const
params.quantized_activation_min = activation_min;
params.quantized_activation_max = activation_max;
- auto im2col = getOutputTensors()[1];
+ auto scratchpad = getOutputTensors()[1];
luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
getTensorShape(filter()), getTensorData<uint8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()),
getTensorShape(output()), getTensorData<uint8_t>(output()),
- getTensorShape(im2col), getTensorData<uint8_t>(im2col));
+ getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
}
void Conv2D::evalQuantizedPerChannel() const
@@ -364,18 +355,16 @@ void Conv2D::evalQuantizedS8PerChannel() const
std::back_inserter(multipliers),
[](ChannelQuantMultipliers cm) { return cm.multiplier; });
- int8_t *im2col_data = nullptr;
- auto im2col = getOutputTensors()[1];
- if (_need_im2col)
- {
- im2col_data = im2col->data<int8_t>();
- }
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
luci_interpreter_pal::ConvPerChannel(
params, multipliers.data(), shifts.data(), getTensorShape(input()),
getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data);
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void Conv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
index 5f1317638..330bf3a2a 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
{
public:
Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *im2col, const Conv2DParams &params);
+ Tensor *scratchpad, const Conv2DParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
@@ -49,7 +49,6 @@ private:
void evalQuantizedS16() const;
private:
- bool _need_im2col = false;
int32_t _padding_height{};
int32_t _padding_width{};
};
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
index 9b1c09ba9..88e6e07f1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
TYPED_TEST(DepthToSpaceTest, SimpleCase)
{
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
index f2dbf6c68..c554c309d 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -18,9 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include "PALDepthwiseConv2d.h"
#include <stdexcept>
@@ -30,8 +28,9 @@ namespace kernels
{
DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
- Tensor *output, const DepthwiseConv2DParams &params)
- : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+ Tensor *output, Tensor *scratchpad,
+ const DepthwiseConv2DParams &params)
+ : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
{
}
@@ -109,6 +108,16 @@ void DepthwiseConv2D::configure()
filter_width, output_width);
output()->resize({batches, output_height, output_width, channels_out});
+
+ tflite::DepthwiseParams params{};
+
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
}
void DepthwiseConv2D::execute() const
@@ -337,11 +346,16 @@ void DepthwiseConv2D::evalQuantizedS8PerChannel() const
std::back_inserter(multipliers),
[](ChannelQuantMultipliers cm) { return cm.multiplier; });
- tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
params, multipliers.data(), shifts.data(), getTensorShape(input()),
getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()));
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void DepthwiseConv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
index 6cffd6583..3d1faf6c1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
{
public:
DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- const DepthwiseConv2DParams &params);
+ Tensor *scratchpad, const DepthwiseConv2DParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
index 74975899a..6b4673f3e 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -59,6 +59,7 @@ TEST_F(DepthwiseConv2DTest, Float)
makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -70,8 +71,10 @@ TEST_F(DepthwiseConv2DTest, Float)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -111,6 +114,7 @@ TEST_F(DepthwiseConv2DTest, Uint8)
{4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -121,9 +125,11 @@ TEST_F(DepthwiseConv2DTest, Uint8)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
std::vector<float> ref_output_data{
@@ -166,6 +172,7 @@ TEST_F(DepthwiseConv2DTest, SInt16)
Tensor bias_tensor =
makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S64, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -176,9 +183,11 @@ TEST_F(DepthwiseConv2DTest, SInt16)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -224,6 +233,7 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
_memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -234,9 +244,11 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -299,6 +311,7 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
_memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -309,9 +322,11 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -375,6 +390,7 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
_memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -385,9 +401,11 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -419,6 +437,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -429,7 +448,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -458,6 +478,7 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -468,7 +489,8 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -497,6 +519,7 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -507,7 +530,8 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -536,6 +560,7 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -546,7 +571,8 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -575,6 +601,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -585,7 +612,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+ input()->element_type() == loco::DataType::U8 ||
+ input()->element_type() == loco::DataType::S16);
+
+ LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+ output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+ tflite::DequantizationParams op_params;
+ op_params.zero_point = input()->zero_point();
+ op_params.scale = input()->scale();
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int16_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+ Dequantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+ std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+ std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp
index 0e52ba1f0..dd1532278 100644
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -46,6 +46,12 @@ void Div::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -56,13 +62,9 @@ void Div::execute() const
void Div::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -80,6 +82,28 @@ void Div::evalFloat() const
}
}
+template <typename T> void Div::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Div::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h
index 6040cdd02..c1bf3e10b 100644
--- a/compiler/luci-interpreter/src/kernels/Div.h
+++ b/compiler/luci-interpreter/src/kernels/Div.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp
index 021d68d06..85cd8b90a 100644
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -134,6 +134,56 @@ TEST_F(DivTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
}
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, //
+ 10, 0, 4, 5, 20, 0, //
+ 0, 0, 0, 2, 0, 0, //
+ 2, 0, 1, 10, 5, 0, //
+ 2, 3, 1, 0, 5, 1, //
+ 18, 20, 7, 0, 37, 10},
+ {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+ {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0,
+ 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10},
+ {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+ std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+ std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(DivTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(DivTest, Input_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -149,9 +199,9 @@ TEST_F(DivTest, Input_Output_Type_NEG)
TEST_F(DivTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
DivParams params{};
params.activation = Activation::RELU;
@@ -162,6 +212,19 @@ TEST_F(DivTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp
index f58de1250..a57e127b7 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.cpp
@@ -49,6 +49,12 @@ void Equal::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Equal::evalFloat() const
}
}
+template <typename T> void Equal::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Equal::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h
index 11f025eac..c9be32cc0 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.h
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
index 46a0f97d8..5870e5460 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -99,6 +99,82 @@ TEST_F(EqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, // Row 1
+ false, false, true, // Row 2
+ false, true, false, // Row 3
+ true, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(EqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+ : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+ int32_t axis_value;
+
+ switch (axis()->element_type())
+ {
+ case loco::DataType::S32:
+ axis_value = *getTensorData<int32_t>(axis());
+ break;
+ case loco::DataType::S64:
+ axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ const auto input_shape = input()->shape();
+
+ if (axis_value < 0)
+ {
+ axis_value += input_shape.num_dims() + 1;
+ }
+
+ LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+ Shape output_shape(input_shape.num_dims() + 1);
+ for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+ {
+ if (i < axis_value)
+ {
+ output_shape.dim(i) = input_shape.dim(i);
+ }
+ else if (i == axis_value)
+ {
+ output_shape.dim(i) = 1;
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(i >= 1);
+ output_shape.dim(i) = input_shape.dim(i - 1);
+ }
+ }
+
+ output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+ // Just copy input to output
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+
+ const size_t element_size = getDataTypeSize(input()->element_type());
+ const int32_t num_elements = input()->shape().num_elements();
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+ ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axis() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {-1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<float> axis_value = {1.0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {3};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
index cfe8f8bf2..bd2bb2f35 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -18,8 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include "PALFullyConnected.h"
#include <stdexcept>
@@ -74,7 +73,18 @@ void FullyConnected::configure()
if (bias())
LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
- output()->resize({batch_size, num_units});
+ if (params().keep_num_dims == false)
+ {
+ output()->resize({batch_size, num_units});
+ }
+ else
+ {
+ luci_interpreter::Shape output_shape(input_shape.num_dims());
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ output_shape.dim(input_shape.num_dims() - 1) = num_units;
+ output()->resize(output_shape);
+ }
}
void FullyConnected::execute() const
@@ -172,7 +182,7 @@ void FullyConnected::evalQuantizedS8() const
op_params.quantized_activation_max = output_activation_max;
op_params.lhs_cacheable = false;
op_params.rhs_cacheable = false;
- tflite::reference_integer_ops::FullyConnected(
+ luci_interpreter_pal::FullyConnected<int8_t>(
op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
getTensorShape(output()), getTensorData<int8_t>(output()));
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
index b0eda0145..4474cc4fb 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -133,7 +133,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
TYPED_TEST(FullyConnectedTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+ const GatherParams &gparams)
+ : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+ if (params()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+ indices()->element_type() == DataType::S64);
+
+ // refer tensorflow/lite/kernels/gather.cc
+
+ const Shape &params_shape = params()->shape();
+ const Shape &indices_shape = indices()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ {
+ axis += params_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+ int batch_dims = _params.batch_dims;
+ // batch_dims should be in range: [-rank(indices), rank(indices)].
+ // Negative batch_dims is added with rank of positions.
+ if (batch_dims < 0)
+ {
+ batch_dims += indices_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+ LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+ LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+ for (int i = 0; i < batch_dims; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+ }
+
+ const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+ Shape output_shape(num_dimensions);
+ int output_index = 0;
+ for (int i = 0; i < axis; ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = indices_shape.dim(i);
+ }
+ for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+ switch (params()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Gather::evalFloat() const
+{
+ assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+ const auto params_data = getTensorData<float>(params());
+ auto output_data = getTensorData<float>(output());
+
+ tflite::GatherParams tparams;
+ tparams.axis = _params.axis;
+ tparams.batch_dims = _params.batch_dims;
+
+ if (indices()->element_type() == DataType::S32)
+ {
+ const auto indices_data = getTensorData<int32_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ const auto indices_data = getTensorData<int64_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+ Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+ const Tensor *params() const { return _inputs[0]; }
+ const Tensor *indices() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+ Shape params_shape = {3, 5};
+ Shape indices_shape = {3, 2};
+ std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+ std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+ std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+ Tensor indices_tensor =
+ makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 100;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 0;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp
index f0dd2db36..5ccae3c38 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.cpp
@@ -49,6 +49,12 @@ void Greater::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Greater::evalFloat() const
}
}
+template <typename T> void Greater::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Greater::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h
index 877c139c9..065f76d7b 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.h
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
index ba3925f17..a48080124 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -97,6 +97,82 @@ TEST_F(GreaterTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, false, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(GreaterTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
index e7c1b4afe..27e42c971 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -52,6 +52,12 @@ void GreaterEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -82,6 +88,29 @@ void GreaterEqual::evalFloat() const
}
}
+template <typename T> void GreaterEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
void GreaterEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
index 4a0f48748..e333c30a6 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
index a9d172301..35bf88eab 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -96,6 +96,81 @@ TEST_F(GreaterEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value - 1, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
@@ -223,6 +298,36 @@ TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
index 1e565e358..6f960e8b4 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -81,7 +81,7 @@ template <typename T> class L2NormalizeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(L2NormalizeTest, DataTypes);
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
TYPED_TEST(L2NormalizeTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
index 289742a50..7245456cb 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -206,7 +206,8 @@ TEST_F(L2Pool2DTest, FloatPaddingSameStride)
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
// TODO make a Shape checking of output_tensor.
}
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
index 6ec8a348a..0f6263b57 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -83,7 +83,7 @@ template <typename T> class LeakReluTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LeakReluTest, DataTypes);
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
TYPED_TEST(LeakReluTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp
index 041444926..8d26ff297 100644
--- a/compiler/luci-interpreter/src/kernels/Less.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.cpp
@@ -49,6 +49,12 @@ void Less::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Less::evalFloat() const
}
}
+template <typename T> void Less::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Less::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h
index 293740e72..e27bb689c 100644
--- a/compiler/luci-interpreter/src/kernels/Less.h
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp
index e9d09b288..8c5963363 100644
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
index 5f4c7f7aa..b474bc47a 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -49,6 +49,12 @@ void LessEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void LessEqual::evalFloat() const
}
}
+template <typename T> void LessEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void LessEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h
index b6da1a2a8..f82ea90d4 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.h
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
index 0558003dd..b2e2fa7a1 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, true, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
index 70227563f..5a1ea669c 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -76,7 +76,7 @@ template <typename T> class LogisticTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LogisticTest, DataTypes);
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
TYPED_TEST(LogisticTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
index 89049c96c..2fbeefce4 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,8 +19,6 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
namespace luci_interpreter
{
namespace kernels
@@ -59,44 +58,25 @@ void MirrorPad::configure()
output()->resize(output_shape);
}
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output);
+
void MirrorPad::execute() const
{
- const int num_dims = input()->shape().num_dims();
-
- tflite::PadParams params{};
- params.left_padding_count = num_dims;
- params.right_padding_count = num_dims;
-
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = num_dims - 1; i >= 0; --i)
- {
- params.left_padding[i] = paddings_data[i * 2];
- params.right_padding[i] = paddings_data[i * 2 + 1];
- }
-
switch (input()->element_type())
{
case DataType::FLOAT32:
{
- const float pad_value = 0;
-
- // NOTE: this implementation only obtains min-max values for quantization
- // TODO: calculate proper inference values
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<float>(output()));
+ MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
break;
}
case DataType::U8:
{
- // NOTE: this implementation only obtains min-max values for quantization
- // TODO: calculate proper inference values
assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
- const auto pad_value = static_cast<uint8_t>(output()->zero_point());
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+
+ MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
break;
}
default:
@@ -104,5 +84,87 @@ void MirrorPad::execute() const
}
}
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output)
+{
+ auto const input_dims = input.shape().num_dims();
+ auto const input_data = input.data<T>();
+ auto const paddings_data = paddings.data<int32_t>();
+ auto const output_data = output.data<T>();
+
+ auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+ auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+ auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+ auto const input_d = input.shape().dim(input_dims - 1);
+
+ auto const input_h_offset = input_d * input_w;
+ auto const input_b_offset = input_h_offset * input_h;
+
+ auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+ auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+ auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+ auto const output_d = output.shape().dim(input_dims - 1);
+
+ auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+ auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+ auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+ auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+ auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+ auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+ auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+ auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+ const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+ const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+ auto b) {
+ return d + w * input_d + h * input_h_offset + b * input_b_offset;
+ };
+
+ const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+ bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+ return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+ };
+
+ const T *in_ptr = input_data;
+ T *out_ptr = output_data;
+
+ for (int32_t b = 0; b < output_b; ++b)
+ {
+ for (int32_t h = 0; h < output_h; ++h)
+ {
+ for (int32_t w = 0; w < output_w; ++w)
+ {
+ for (int32_t d = 0; d < output_d; ++d)
+ {
+ if (b < left_b_pad || b >= output_b - right_b_pad || //
+ h < left_h_pad || h >= output_h - right_h_pad || //
+ w < left_w_pad || w >= output_w - right_w_pad || //
+ d < left_d_pad || d >= output_d - right_d_pad)
+ {
+ if (mode == MirrorPadMode::REFLECT)
+ {
+ *out_ptr++ = input_data[offset_index(
+ positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+ positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+ }
+ else
+ {
+ *out_ptr++ = input_data[offset_index(
+ symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+ symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+ }
+ }
+ else
+ {
+ *out_ptr++ = *in_ptr++;
+ }
+ }
+ }
+ }
+ }
+}
+
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
index de9da5051..740d8cb22 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -14,4 +14,212 @@
* limitations under the License.
*/
-// TODO: Add tests for MirrorPad
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+ {
+ MirrorPadParams params{};
+ params.mode = mode;
+
+ MirrorPad kernel(&input, &padding, &output, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output);
+ kernel.execute();
+ }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+ Shape input_shape = {3, 1};
+ Shape padding_shape = {2, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+ std::vector<int> padding_data{1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+ std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index bc855de0f..531fb4fa1 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -42,6 +42,8 @@ void Mul::configure()
LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
if (input1()->element_type() == DataType::S16)
{
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1)
LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
output()->zero_point() == 0);
}
@@ -56,6 +58,12 @@ void Mul::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::S16:
evalQuantizedS16();
break;
@@ -66,13 +74,8 @@ void Mul::execute() const
void Mul::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -91,6 +94,28 @@ void Mul::evalFloat() const
}
}
+template <typename T> void Mul::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Mul::evalQuantizedS16() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h
index 2ccf60f3a..c0cf817df 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.h
+++ b/compiler/luci-interpreter/src/kernels/Mul.h
@@ -42,6 +42,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantizedS16() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
index 471f6ac86..fc0e60614 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -93,6 +93,78 @@ TEST_F(MulTest, Float)
}
}
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ dtype max_value = std::numeric_limits<dtype>::max();
+ dtype res_max = max_value - max_value % 10;
+
+ std::vector<std::vector<dtype>> test_outputs = {
+ {8, 0, 20, 0, 4, 30, //
+ 16, 0, 40, 3, 8, 0, //
+ 0, 0, 0, 6, 0, 0, //
+ 4, 0, 10, 9, 2, 0, //
+ 40, 0, 100, 0, 20, 150, //
+ 28, 0, 70, 0, 14, res_max},
+ {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+ {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0,
+ 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+ 70, res_max},
+ {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+ std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+ std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(MulTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(MulTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
@@ -161,6 +233,60 @@ TEST_F(MulTest, SInt16)
}
}
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ MulParams params{};
+ params.activation = Activation::NONE;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
index 99d5e0fa0..54e5eee34 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -49,6 +49,12 @@ void NotEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void NotEqual::evalFloat() const
}
}
+template <typename T> void NotEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void NotEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h
index 247874df7..d2aafe893 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.h
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
index 763f86893..45bf4022a 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -99,6 +99,82 @@ TEST_F(NotEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, // Row 1
+ true, true, false, // Row 2
+ true, false, true, // Row 3
+ false, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(NotEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+ const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+ Tensor *output_tensor)
+{
+ // define input shape and correct axis
+ auto const &input_shape = indices_tensor->shape();
+ axis = axis == -1 ? input_shape.num_dims() : axis;
+
+ // TODO support other integer input types
+ auto const *indices = getTensorData<int32_t>(indices_tensor);
+ auto const on_value = getTensorData<T>(on_value_tensor)[0];
+ auto const off_value = getTensorData<T>(off_value_tensor)[0];
+ auto *output = getTensorData<T>(output_tensor);
+
+ // prefix_dim_size == # of elements before the axis
+ // depth == # of elements per axis
+ // suffix_dim_size == # of elements after the axis
+ auto prefix_dim_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ {
+ prefix_dim_size *= input_shape.dim(i);
+ }
+ assert(prefix_dim_size > 0);
+ auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+ // View the indices as a matrix of size:
+ // prefix_dim_size x suffix_dim_size
+ // View the output as a matrix of size:
+ // prefix_dim_size x depth x suffix_dim_size
+ // Then the output is:
+ // output(i, j, k) == (indices(i, k) == j) ? on : off
+ for (int32_t i = 0; i < prefix_dim_size; ++i)
+ for (int32_t j = 0; j < depth; ++j)
+ for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+ *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params)
+ : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+ // Do nothing
+}
+
+void OneHot::configure()
+{
+ // check types
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+ // check shape dependent parameters
+ LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+ // define parameters that affect the output shape
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const &input_shape = indices()->shape();
+ auto const input_dims = input_shape.num_dims();
+ auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+ // define output shape
+ Shape output_shape(input_shape.num_dims() + 1);
+ {
+ for (int32_t d = 0; d < axis; ++d)
+ output_shape.dim(d) = input_shape.dim(d);
+
+ output_shape.dim(axis) = depth_value;
+
+ for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+ output_shape.dim(d) = input_shape.dim(d - 1);
+ }
+
+ // reshape output
+ output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const axis = params().axis;
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::U8:
+ OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::S16:
+ OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ default:
+ // TODO Support other data types
+ throw std::runtime_error("Not supported, yet!");
+ break;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+ OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+ const Tensor *indices() const { return _inputs[0]; }
+ const Tensor *depth() const { return _inputs[1]; }
+ const Tensor *on_value() const { return _inputs[2]; }
+ const Tensor *off_value() const { return _inputs[3]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+ std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+ int32_t axis, std::initializer_list<T2> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr auto input_type = getElementType<T1>();
+ constexpr auto output_type = getElementType<T2>();
+
+ Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ OneHotParams params{};
+ params.axis = axis;
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+ EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+ // axis 0
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/0,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 1, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 1, 0, //
+ 0, 1, 0, //
+ });
+ // axis 1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/1,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+
+ 0, 0, 1, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+ });
+ // axis -1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/-1,
+ /*output_data=*/
+ {
+ 1, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 0, 0, 0, 0, //
+
+ 0, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 1, 0, 0, 0, //
+ });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ // input type should be integer
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+ // type of on_value, off_value and output_tensor should be same
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ // axis should be in [-1, input_shape.rank]
+ OneHotParams params = {-2};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
index 90a0f894e..2404e4303 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -80,7 +80,7 @@ template <typename T> class PackTest : public ::testing::Test
};
using DataTypes = ::testing::Types<uint8_t, float>;
-TYPED_TEST_CASE(PackTest, DataTypes);
+TYPED_TEST_SUITE(PackTest, DataTypes);
TYPED_TEST(PackTest, ThreeInputs)
{
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index 700448e7a..fe172884b 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -93,6 +93,16 @@ void Pad::execute() const
getTensorData<uint8_t>(output()));
break;
}
+ case DataType::S8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+ const auto pad_value = static_cast<int8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ }
default:
throw std::runtime_error("Unsupported type.");
}
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
index 7994263e2..dd3ce947c 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -54,6 +54,32 @@ TEST(Pad, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
}
+TEST(Pad, Int8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+ std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+ std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+ Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0,
+ 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
TEST(Pad, Float)
{
std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+ int32_t multiplier;
+ int shift;
+
+ const double effective_output_scale = input->scale() / output->scale();
+ quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+ const auto input_shape = getTensorShape(input);
+ const auto output_shape = getTensorShape(output);
+ const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+ const auto input_data = getTensorData<input_dtype>(input);
+
+ switch (output->element_type())
+ {
+ case loco::DataType::S8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int8_t>(output));
+ break;
+ case loco::DataType::U8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<uint8_t>(output));
+ break;
+ case loco::DataType::S16:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int16_t>(output));
+ break;
+ default:
+ throw std::runtime_error("Unsupported quantized type, yet!");
+ }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::S16);
+ break;
+ }
+ case loco::DataType::S16:
+ case loco::DataType::S8:
+ case loco::DataType::U8:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S16);
+ if (output()->element_type() == loco::DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+
+ output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ tflite::QuantizationParams op_params;
+ op_params.zero_point = output()->zero_point();
+ op_params.scale = output()->scale();
+ const auto input_data = getTensorData<float>(input());
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<int16_t>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ call_requantize<int16_t>(input(), output());
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ call_requantize<int8_t>(input(), output());
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ call_requantize<uint8_t>(input(), output());
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+ Quantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+ std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+ std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200,
+ 200, 400, 600, 12700, 12800};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
index 7af20f8c4..933a1128c 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class ResizeBilinearTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeBilinearTest, DataTypes);
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
TYPED_TEST(ResizeBilinearTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
index 0e9017c78..7ade02a6f 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -92,7 +92,7 @@ template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeNearestNeighborTest, DataTypes);
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
index 2bd94875b..c0025faca 100644
--- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -33,7 +33,7 @@ template <typename T> class ReverseV2Test : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ReverseV2Test, DataTypes);
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
TYPED_TEST(ReverseV2Test, MultiDimensions)
{
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..40d79aaa3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+ switch (activation)
+ {
+ case luci::FusedActFunc::RELU:
+ return kTfLiteActRelu;
+ case luci::FusedActFunc::RELU6:
+ return kTfLiteActRelu6;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return kTfLiteActReluN1To1;
+ case luci::FusedActFunc::TANH:
+ return kTfLiteActTanh;
+ case luci::FusedActFunc::SIGN_BIT:
+ return kTfLiteActSignBit;
+ case luci::FusedActFunc::NONE:
+ return kTfLiteActNone;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params)
+ : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+ {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+ scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+ params)
+{
+ // Do nothing
+}
+
+void SVDF::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const Shape &weight_features_shape = weight_feature()->shape();
+ const Shape &weight_time_shape = weight_time()->shape();
+
+ // Validate Input Tensor:
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+ input()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+ // Validate inputs and output types
+ if (input()->element_type() == loco::DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+ weight_time()->element_type() == loco::DataType::S8);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+ input_activation_state()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+ // Note: now tflite support only ReLU activation for integer SVDF
+ LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+ }
+ else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+ }
+ else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+ weight_feature()->element_type() == loco::DataType::S8) &&
+ input()->element_type() == loco::DataType::FLOAT32)
+ {
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ // Check all the parameters of tensor match within themselves and match the
+ // input configuration.
+ const int rank = params().svdf_rank;
+ const int batch_size = input_shape.dim(0);
+ const int num_filters = weight_features_shape.dim(0);
+ LUCI_INTERPRETER_CHECK(rank != 0);
+ LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+ const int num_units = num_filters / rank;
+ const int memory_size = weight_time_shape.dim(1);
+
+ // Validate Weight_Feature Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+ // Validate Weight_Time Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+ // Validate Bias
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+ // Validate Input Activation State
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+ // Resize scratchpad_state to input_activation_state
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+ // Resize output tensor
+ output()->resize({batch_size, num_units});
+
+ luci_interpreter_pal::SetupScratchpadTensor(
+ input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+ getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+ getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+ switch (weight_feature()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ evalFloat();
+ break;
+ case loco::DataType::S8:
+ {
+ if (input()->element_type() == loco::DataType::S8)
+ evalInteger();
+ else
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
+
+void SVDF::evalInteger() const
+{
+ const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+ input_activation_state()->scale());
+ const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+ weight_time()->scale() / output()->scale());
+
+ int32_t effective_scale_1_a;
+ int effective_scale_1_b;
+ int32_t effective_scale_2_a;
+ int effective_scale_2_b;
+
+ tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+ tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad = getOutputTensors()[2];
+ auto output_temp = getOutputTensors()[3];
+
+ int32_t input_zp = input()->zero_point();
+ int32_t output_zp = output()->zero_point();
+ luci_interpreter_pal::IntegerSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+ getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+ getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+ getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+ effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad_1 = getOutputTensors()[2];
+
+ luci_interpreter_pal::FloatSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+ getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+ SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *weight_feature() const { return _inputs[1]; }
+ const Tensor *weight_time() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
+ const Tensor *input_activation_state() const { return _inputs[4]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape bias_shape{units};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+ 0.17660543, 0.52949083, -0.77931279};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+ std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+ std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+ std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+ std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+ weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+ weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+ weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+ weight_time_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(
+ DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::RELU;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+ std::vector<int32_t> ref_output_shape{batches, units};
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+ 0.35867718, 0.36897406, 0.73463392};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883,
+ -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+ std::vector<float> ref_output_shape{batches, units};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t right_input_size = 3;
+ const int32_t wrong_input_size = 4;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, wrong_input_size};
+ Shape weight_feature_shape{num_filters, right_input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp
index 37a834a18..2fe2c5471 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -139,6 +139,11 @@ void Slice::execute() const
getTensorData<uint8_t>(input()), getTensorShape(output()),
getTensorData<uint8_t>(output()));
break;
+ case DataType::S8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
default:
throw std::runtime_error("Unsupported input type.");
}
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
index 3e0d0b0d7..517982990 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -31,8 +31,8 @@ template <typename T> class SliceTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SliceTest, DataTypes);
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
TYPED_TEST(SliceTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
index 9de40b6ec..08e70672d 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -93,7 +93,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(SoftmaxTest, DataTypes);
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
TYPED_TEST(SoftmaxTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
index e06501c8c..3a8b0a812 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class SpaceToBatchNDTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
TYPED_TEST(SpaceToBatchNDTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
index 735c010b9..4af488618 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
TYPED_TEST(SpaceToDepthTest, SimpleCase)
{
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
index 74d57aed3..283cd9aa9 100644
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -73,7 +73,7 @@ template <typename T> class SplitTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SplitTest, DataTypes);
+TYPED_TEST_SUITE(SplitTest, DataTypes);
TYPED_TEST(SplitTest, FourDimensional)
{
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
index aac0567d7..035bc2122 100644
--- a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -77,7 +77,7 @@ template <typename T> class SplitVTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
-TYPED_TEST_CASE(SplitVTest, DataTypes);
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
TYPED_TEST(SplitVTest, ThreeDimensional)
{
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
index d3326fe98..1bc0b6459 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -56,7 +56,7 @@ template <typename T> class SqueezeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SqueezeTest, DataTypes);
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
TYPED_TEST(SqueezeTest, TotalTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp
index 603c62d0f..24b6a72e5 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -37,6 +37,7 @@ Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubPa
void Sub::configure()
{
LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
}
@@ -47,6 +48,12 @@ void Sub::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -57,13 +64,8 @@ void Sub::execute() const
void Sub::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -82,6 +84,28 @@ void Sub::evalFloat() const
}
}
+template <typename T> void Sub::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Sub::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h
index d7940b5c6..23952b3bd 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.h
+++ b/compiler/luci-interpreter/src/kernels/Sub.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
index c189f4481..9abafd49a 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -162,6 +162,51 @@ TEST_F(SubTest, Float)
}
}
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0,
+ 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+ {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+ {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+ 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+ {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(SubTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(SubTest, Input_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -175,11 +220,24 @@ TEST_F(SubTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(SubTest, Invalid_Input_Type_NEG)
+TEST_F(SubTest, Invalid_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
SubParams params{};
params.activation = Activation::RELU;
@@ -190,6 +248,19 @@ TEST_F(SubTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::NONE;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
index 107179910..43be8f8b9 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -52,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(TransposeTest, DataTypes);
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
TYPED_TEST(TransposeTest, Small3D)
{
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
index 4f22c9f30..9384ddc83 100644
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -75,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(UnpackTest, DataTypes);
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
TYPED_TEST(UnpackTest, ThreeOutputs)
{
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index 586cfa1e1..5d8e5db83 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -27,17 +27,18 @@ namespace luci_interpreter
namespace kernels
{
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max)
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
{
switch (activation)
{
case Activation::NONE:
- *activation_min = std::numeric_limits<float>::lowest();
- *activation_max = std::numeric_limits<float>::max();
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
break;
case Activation::RELU:
*activation_min = 0;
- *activation_max = std::numeric_limits<float>::max();
+ *activation_max = std::numeric_limits<T>::max();
break;
case Activation::RELU_N1_TO_1:
*activation_min = -1;
@@ -52,6 +53,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
}
}
+template void calculateActivationRange(Activation activation, float *activation_min,
+ float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+ int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+ int64_t *activation_max);
+
static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
const Tensor *output, int32_t *activation_min,
int32_t *activation_max)
@@ -175,7 +183,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_
{
const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
- assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1);
+
+ bool need_broadcast = input1_dim != input2_dim;
+ bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+ LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
}
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 817a42f83..ebeb20e66 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -76,11 +76,42 @@ inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2
return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
}
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
int32_t *activation_min, int32_t *activation_max);
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+ return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+ static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+ if (std::is_same<T, float>::value)
+ calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+ if (std::is_same<T, int32_t>::value)
+ calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+ else
+ calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index 2cde99f5d..292771592 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -17,7 +17,9 @@ endmacro(REGISTER_KERNEL)
include(${KERNEL_REGISTER_FILE})
add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index a14442ed5..dba39050c 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -73,6 +73,26 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
}
}
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+ if (node->custom_code() != "CircleReferencingConst")
+ return nullptr;
+
+ // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+ // TODO move this struct to header
+ struct ConstDataReference
+ {
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+ };
+
+ const auto &custom_options = node->custom_options();
+ const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+ *data_size = const_data_ref.size;
+ return const_data_ref.data;
+}
+
bool isExecutableNode(const luci::CircleNode *node)
{
switch (node->opcode())
@@ -83,12 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node)
case luci::CircleOpcode::CIRCLEOUTPUT:
case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
// The following nodes denote outputs of multiple-output nodes.
+ case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
case luci::CircleOpcode::CIRCLESPLITOUT:
case luci::CircleOpcode::CIRCLESPLITVOUT:
+ case luci::CircleOpcode::CIRCLETOPKV2OUT:
+ case luci::CircleOpcode::CIRCLEUNIQUEOUT:
case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
case luci::CircleOpcode::CIRCLEWHILEOUT:
return false;
+ // Custom nodes may be executable and non-executable
+ case luci::CircleOpcode::CUSTOM:
+ {
+ auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more non-executable Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return false;
+
+ return true;
+ }
default:
return true;
}
@@ -102,15 +140,34 @@ bool isTensorProducingNode(const luci::CircleNode *node)
case luci::CircleOpcode::CIRCLEOUTPUT:
// The following nodes are multiple-output nodes. They do not produce tensors, the tensors
// are produced by the corresponding *Out nodes instead.
+ case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+ case luci::CircleOpcode::CUSTOM:
case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::SPLIT_V:
+ case luci::CircleOpcode::TOPK_V2:
+ case luci::CircleOpcode::UNIQUE:
case luci::CircleOpcode::UNPACK:
+ case luci::CircleOpcode::WHILE:
return false;
default:
return true;
}
}
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+ const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return true;
+
+ return false;
+}
+
} // namespace
GraphLoader::GraphLoader(
@@ -129,18 +186,25 @@ void GraphLoader::loadTensors()
{
const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+ if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+ throw std::runtime_error("Unknown Custom Node, yet.");
+
if (!isTensorProducingNode(node))
continue;
- // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred.
+ // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+ // be inferred.
Shape shape{};
- if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node))
+ switch (node->opcode())
{
- shape = getNodeShape(input_node);
- }
- else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
- {
- shape = getNodeShape(const_node);
+ case luci::CircleOpcode::CIRCLECONST:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
+ case luci::CircleOpcode::CIRCLEINPUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
+ shape = getNodeShape(node);
+ break;
+ default:
+ break;
}
AffineQuantization quantization;
@@ -175,6 +239,22 @@ void GraphLoader::loadTensors()
tensor->writeData(const_data, data_size);
}
}
+ else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+ {
+ const auto *custom_node =
+ loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ {
+ size_t data_size{};
+ const void *const_data = getNodeData(custom_node, &data_size);
+ if (const_data != nullptr)
+ {
+ _memory_manager->allocate_memory(*tensor);
+ tensor->writeData(const_data, data_size);
+ }
+ }
+ }
_node_to_tensor.emplace(node, tensor.get());
_runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index 7a457a62f..b221b6921 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -21,6 +21,7 @@
#include <kernels/Add.h>
#include <kernels/ArgMax.h>
#include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
#include <kernels/Cast.h>
#include <kernels/Concatenation.h>
#include <kernels/Conv2D.h>
@@ -54,6 +55,7 @@
#include <kernels/Mul.h>
#include <kernels/Neg.h>
#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
#include <kernels/Pad.h>
#include <kernels/PadV2.h>
#include <kernels/Pow.h>
@@ -209,6 +211,27 @@ TEST_F(KernelBuilderTest, AveragePool2D)
EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
}
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+ auto *lhs = createInputNode();
+ auto *rhs = createInputNode();
+
+ auto *op = createNode<luci::CircleBatchMatMul>();
+ op->x(lhs);
+ op->y(rhs);
+ op->adj_x(false);
+ op->adj_y(false);
+
+ auto kernel = buildKernel<kernels::BatchMatMul>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), lhs);
+ checkTensor(kernel->y(), rhs);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+ EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
TEST_F(KernelBuilderTest, Cast)
{
auto *input = createInputNode();
@@ -832,6 +855,31 @@ TEST_F(KernelBuilderTest, NotEqual)
checkTensor(kernel->output(), op);
}
+TEST_F(KernelBuilderTest, OneHot)
+{
+ auto *indices = createInputNode();
+ auto *depth = createInputNode();
+ auto *on_value = createInputNode();
+ auto *off_value = createInputNode();
+ auto axis = 1;
+
+ auto *op = createNode<luci::CircleOneHot>();
+ op->indices(indices);
+ op->depth(depth);
+ op->on_value(on_value);
+ op->off_value(off_value);
+ op->axis(axis);
+
+ auto kernel = buildKernel<kernels::OneHot>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->indices(), indices);
+ checkTensor(kernel->depth(), depth);
+ checkTensor(kernel->on_value(), on_value);
+ checkTensor(kernel->off_value(), off_value);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
TEST_F(KernelBuilderTest, Pad)
{
auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
index 5bc37bd4a..efb011257 100644
--- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -17,6 +17,7 @@
#include "Builders.h"
#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
namespace luci_interpreter
{
@@ -40,7 +41,26 @@ std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode
params.stride_width = node->stride()->w();
params.activation = node->fusedActivationFunction();
- return std::make_unique<kernels::AveragePool2D>(input, output, params);
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
}
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..aae3dbab1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *lhs = helper.getInputTensor(node->x());
+ const Tensor *rhs = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto lhs_scratchpad =
+ std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+ lhs_scratchpad->set_observable(false);
+ lhs_scratchpad->set_data_buffer(nullptr);
+ auto rhs_scratchpad =
+ std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+ rhs_scratchpad->set_observable(false);
+ rhs_scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current BatchMatMul temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ {
+ assert(execution_plan.offsets().size() == 3);
+
+ // If this is true, then we keep this offset in scratchpad.
+ lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+ rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+ }
+ }
+ Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+ Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+ BatchMatMulParams params;
+ params.adj_x = node->adj_x();
+ params.adj_y = node->adj_y();
+
+ return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index 22fd1aca4..b48d97d19 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -35,11 +35,12 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
const Tensor *bias = helper.getOptionalInputTensor(node->bias());
Tensor *output = helper.getOutputTensor(node);
- auto im2col =
- std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
- im2col->set_observable(false);
- im2col->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for im2col temporary tensor
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
// from the beginning of shared memory buffer.
// Used in Static Memory Manager.
// TODO move tensors offset initialization to one place
@@ -48,10 +49,10 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
const auto execution_plan = luci::get_execution_plan(node);
// Check whether the offset for the current CircleConv2D temporary was found.
if (execution_plan.offsets().size() > 1)
- // If this is true, then we keep this offset in im2col.
- im2col->set_offset(execution_plan.offsets().at(1));
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
}
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
Conv2DParams params{};
params.padding = node->padding();
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
index c2f0346a2..db26ecf2e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -17,6 +17,7 @@
#include "Builders.h"
#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
namespace luci_interpreter
{
@@ -43,7 +44,26 @@ std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNod
params.dilation_width_factor = node->dilation()->w();
params.activation = node->fusedActivationFunction();
- return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
}
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..4aae56469
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axis = helper.getInputTensor(node->axis());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
index 2917598fc..0b8ac44bd 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -36,6 +36,7 @@ std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode
FullyConnectedParams params{};
params.activation = node->fusedActivationFunction();
+ params.keep_num_dims = node->keep_num_dims();
return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
}
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..9df9775c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *params = helper.getInputTensor(node->params());
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ GatherParams gparams{};
+ gparams.axis = node->axis();
+ // TODO support batch_dims
+ gparams.batch_dims = 0;
+
+ return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+ assert(node->arity() == 4);
+
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ const Tensor *depth = helper.getInputTensor(node->depth());
+ const Tensor *on_value = helper.getInputTensor(node->on_value());
+ const Tensor *off_value = helper.getInputTensor(node->off_value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ OneHotParams params{};
+ params.axis = node->axis();
+
+ return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..fd9836345
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..89528d5ee
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *feature = helper.getInputTensor(node->weight_feature());
+ const Tensor *time = helper.getInputTensor(node->weight_time());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+ Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ if (data_type == DataType::FLOAT32 &&
+ (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+ {
+ data_type = feature->element_type();
+ }
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ data_type = DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ SVDFParams params{};
+ params.activation = node->fusedActivationFunction();
+ params.svdf_rank = node->svdf_rank();
+ params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+ return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+ tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
index 94347082c..c8a2e12e1 100644
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -6,7 +6,7 @@ set(ARM_OBJCOPY "arm-none-eabi-objcopy")
find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
if(NOT ARM_C_COMPILER_PATH)
- message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build")
+ message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
return()
endif()
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt
index b31415870..034fe5269 100644
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
unset(TEST_DEPS)
unset(LUCI_PASS_VALUE_TESTS)
@@ -38,7 +42,7 @@ add_test(NAME luci_pass_value_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
"${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_2_6_0"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
"$<TARGET_FILE:luci_eval_driver>"
${LUCI_PASS_VALUE_TESTS}
)
diff --git a/compiler/luci-pass-value-test/eval_result_verifier.py b/compiler/luci-pass-value-test/eval_result_verifier.py
index c6005edfc..0073c4db5 100644
--- a/compiler/luci-pass-value-test/eval_result_verifier.py
+++ b/compiler/luci-pass-value-test/eval_result_verifier.py
@@ -22,6 +22,18 @@ circle_model = args.circle
interpreter = tf.lite.Interpreter(tflite_model)
interpreter.allocate_tensors()
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+ signature_serving_default = full_signatures.get('serving_default', None)
+ if signature_serving_default != None:
+ signature_outputs = signature_serving_default['outputs']
+
+ full_signatures_outputs_remap = []
+ for index, (key, value) in enumerate(signature_outputs.items()):
+ full_signatures_outputs_remap.append(value)
+
# Generate random input data.
num_inputs = len(interpreter.get_input_details())
for i in range(num_inputs):
@@ -33,6 +45,10 @@ for i in range(num_inputs):
input_data = np.array(
np.random.randint(0, 256, size=input_details["shape"]),
input_details["dtype"])
+ elif input_details["dtype"] == np.int16:
+ input_data = np.array(
+ np.random.randint(0, 100, size=input_details["shape"]),
+ input_details["dtype"])
elif input_details["dtype"] == np.bool_:
input_data = np.array(
np.random.choice(a=[True, False], size=input_details["shape"]),
@@ -55,48 +71,38 @@ subprocess.run(
check=True)
# Compare the results.
-for idx in range(len(interpreter.get_output_details())):
- output_details = interpreter.get_output_details()[idx]
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+ output_details = inpt_output_details[idx]
output_data = np.fromfile(circle_model + ".output" + str(idx),
output_details["dtype"])
shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
output_shape = [int(i) for i in shape_file.read().split(',')]
luci_output_data = np.reshape(output_data, output_shape)
+ output_tensor = output_details["index"]
+ if full_signatures_outputs_remap != None:
+ output_tensor = full_signatures_outputs_remap[idx]
+ intp_output_data = interpreter.get_tensor(output_tensor)
try:
if output_details["dtype"] == np.uint8:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.float32:
if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=1.e-5,
- atol=1.e-5) == False:
+ luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.int64:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.int32:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int16:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
else:
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst
index 9c408887d..67476c644 100644
--- a/compiler/luci-pass-value-test/test.lst
+++ b/compiler/luci-pass-value-test/test.lst
@@ -29,3 +29,7 @@ addeval(Net_InstanceNorm_001 fuse_instnorm)
addeval(Net_InstanceNorm_002 fuse_instnorm)
addeval(Net_InstanceNorm_003 fuse_instnorm)
addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
+
+# test SignatureDef, with any optimization
+#addeval(SignatureDef_MultiOut_000 fuse_instnorm)
+#addeval(SignatureDef_MultiOut_001 fuse_instnorm)
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt
index 3c7185b80..ebf9c5926 100644
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -1,9 +1,18 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
unset(LUCI_VALUE_TESTS)
+unset(LUCI_VALUE_TESTS_TOL)
macro(addeval NAME)
list(APPEND LUCI_VALUE_TESTS ${NAME})
endmacro(addeval)
+macro(addevaltol NAME RTOL ATOL)
+ list(APPEND LUCI_VALUE_TESTS_TOL ${NAME} ${RTOL} ${ATOL})
+endmacro(addevaltol)
+
# Read "test.lst"
include("test.lst")
# Read "test.local.lst" if exists
@@ -12,13 +21,60 @@ include("test.local.lst" OPTIONAL)
# Generate dependencies
add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES})
-get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+if(NOT CMAKE_CROSSCOMPILING)
+
+ get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+ add_test(NAME luci_value_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS}
+ )
+
+ if(DEFINED LUCI_VALUE_TESTS_TOL)
+ add_test(NAME luci_value_tol_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS_TOL}
+ )
+ endif()
+
+else(NOT CMAKE_CROSSCOMPILING)
+ # NOTE target test is carried out using reference input/output data from host
+ # test results. this is because it would be difficult to prepare
+ # TensorFlow lite for target device.
+ # thus, one must run the host test and then run the test in target device
+ # with the test result files from the host test.
+
+ if(NOT DEFINED ENV{BUILD_HOST_EXEC})
+ message(STATUS "BUILD_HOST_EXEC not set: Skip luci-value-test")
+ return()
+ endif(NOT DEFINED ENV{BUILD_HOST_EXEC})
+
+ set(ARTIFACTS_BIN_PATH $ENV{BUILD_HOST_EXEC}/compiler/common-artifacts)
+
+ add_test(NAME luci_value_cross_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify_ref.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS}
+ )
+
+ if(DEFINED LUCI_VALUE_TESTS_TOL)
+ add_test(NAME luci_value_cross_tol_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol_ref.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS_TOL}
+ )
+ endif()
-add_test(NAME luci_value_test
- COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
- "${CMAKE_CURRENT_BINARY_DIR}"
- "${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_2_6_0"
- "$<TARGET_FILE:luci_eval_driver>"
- ${LUCI_VALUE_TESTS}
-)
+endif(NOT CMAKE_CROSSCOMPILING)
diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh
index 01c4bce46..3d2091176 100755
--- a/compiler/luci-value-test/evalverify.sh
+++ b/compiler/luci-value-test/evalverify.sh
@@ -4,10 +4,12 @@
#
# HOW TO USE
#
-# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ...
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+# <TEST 1> <TEST 2> ...
# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
# work_dir : artifacts directoy where test materials exist
# venv_dir : python virtual environment home directory
+# eval_driver : luci_eval_driver path for evaluation
VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
diff --git a/compiler/luci-value-test/evalverify_ref.sh b/compiler/luci-value-test/evalverify_ref.sh
new file mode 100755
index 000000000..f1e538aa3
--- /dev/null
+++ b/compiler/luci-value-test/evalverify_ref.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverify_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+# <TEST 1> <TEST 2> ...
+# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ "python3" "${VERIFY_SCRIPT_PATH}" \
+ --driver "${INTERPRETER_DRIVER_PATH}" \
+ --model_ref "${TESTCASE_FILE}" \
+ --work_path "${TEST_RESULT_FILE}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol.sh b/compiler/luci-value-test/evalverifytol.sh
new file mode 100755
index 000000000..92094055a
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+# <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+ TESTCASE=$1
+ RTOLERANCE=$2
+ ATOLERANCE=$3
+ shift 3
+
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+ --driver "${INTERPRETER_DRIVER_PATH}" \
+ --model "${TESTCASE_FILE}" \
+ --rtolf32 "${RTOLERANCE}" \
+ --atolf32 "${ATOLERANCE}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol_ref.sh b/compiler/luci-value-test/evalverifytol_ref.sh
new file mode 100755
index 000000000..cc7267b18
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol_ref.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+# <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+ TESTCASE=$1
+ RTOLERANCE=$2
+ ATOLERANCE=$3
+ shift 3
+
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ "python3" "${VERIFY_SCRIPT_PATH}" \
+ --driver "${INTERPRETER_DRIVER_PATH}" \
+ --model_ref "${TESTCASE_FILE}" \
+ --work_path "${TEST_RESULT_FILE}" \
+ --rtolf32 "${RTOLERANCE}" \
+ --atolf32 "${ATOLERANCE}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py
index a76bd1403..560e34fca 100755
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -14,16 +14,41 @@ import traceback
parser = argparse.ArgumentParser()
parser.add_argument('--driver', type=str, required=True)
parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
args = parser.parse_args()
driver = args.driver
tflite_model = args.model + ".tflite"
circle_model = args.model + ".circle"
+rtolf32 = 1e-5
+atolf32 = 1e-5
+try:
+ if args.rtolf32 != None:
+ rtolf32 = float(args.rtolf32)
+ if args.atolf32 != None:
+ atolf32 = float(args.atolf32)
+except ValueError:
+ print("rtolf32 or atolf32 is not a number")
+ quit(128)
+
# Build TFLite interpreter.
interpreter = tf.lite.Interpreter(tflite_model)
interpreter.allocate_tensors()
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+ signature_serving_default = full_signatures.get('serving_default', None)
+ if signature_serving_default != None:
+ signature_outputs = signature_serving_default['outputs']
+
+ full_signatures_outputs_remap = []
+ for index, (key, value) in enumerate(signature_outputs.items()):
+ full_signatures_outputs_remap.append(value)
+
# Generate random input data.
num_inputs = len(interpreter.get_input_details())
for i in range(num_inputs):
@@ -31,19 +56,40 @@ for i in range(num_inputs):
if input_details["dtype"] == np.float32:
input_data = np.array(
np.random.random_sample(input_details["shape"]), input_details["dtype"])
+ input_dtype = "float32"
elif input_details["dtype"] == np.uint8:
input_data = np.array(
np.random.randint(0, 256, size=input_details["shape"]),
input_details["dtype"])
+ input_dtype = "uint8"
+ elif input_details["dtype"] == np.int16:
+ input_data = np.array(
+ np.random.randint(0, 100, size=input_details["shape"]),
+ input_details["dtype"])
+ input_dtype = "int16"
+ elif input_details["dtype"] == np.int32:
+ input_data = np.array(
+ np.random.randint(0, 100, size=input_details["shape"]),
+ input_details["dtype"])
+ input_dtype = "int32"
+ elif input_details["dtype"] == np.int64:
+ input_data = np.array(
+ np.random.randint(0, 100, size=input_details["shape"]),
+ input_details["dtype"])
+ input_dtype = "int64"
elif input_details["dtype"] == np.bool_:
input_data = np.array(
np.random.choice(a=[True, False], size=input_details["shape"]),
input_details["dtype"])
+ input_dtype = "bool"
else:
raise SystemExit("Unsupported input dtype")
interpreter.set_tensor(input_details["index"], input_data)
input_data.tofile(circle_model + ".input" + str(i))
+ input_details["shape"].tofile(circle_model + ".input" + str(i) + ".shape", sep=',')
+ with open(circle_model + ".input" + str(i) + ".dtype", 'w') as dtype_file:
+ dtype_file.write(input_dtype)
# Do inference
interpreter.invoke()
@@ -57,34 +103,57 @@ subprocess.run(
check=True)
# Compare the results.
-for idx in range(len(interpreter.get_output_details())):
- output_details = interpreter.get_output_details()[idx]
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+ output_details = inpt_output_details[idx]
output_data = np.fromfile(circle_model + ".output" + str(idx),
output_details["dtype"])
shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
output_shape = [int(i) for i in shape_file.read().split(',')]
luci_output_data = np.reshape(output_data, output_shape)
- intp_output_data = interpreter.get_tensor(output_details["index"])
+ output_tensor = output_details["index"]
+ if full_signatures_outputs_remap != None:
+ output_tensor = full_signatures_outputs_remap[idx]
+ intp_output_data = interpreter.get_tensor(output_tensor)
try:
if output_details["dtype"] == np.uint8:
if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
+ output_dtype = "uint8"
elif output_details["dtype"] == np.float32:
if np.allclose(
- luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
+ luci_output_data, intp_output_data, rtol=rtolf32,
+ atol=atolf32) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
+ output_dtype = "float32"
elif output_details["dtype"] == np.int64:
if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
+ output_dtype = "int64"
elif output_details["dtype"] == np.int32:
if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
+ output_dtype = "int32"
+ elif output_details["dtype"] == np.int16:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ output_dtype = "int16"
+ elif output_details["dtype"] == np.bool_:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ output_dtype = "bool"
else:
raise SystemExit("Unsupported data type: ", output_details["dtype"])
+
+ # save outputN.dtype file
+ with open(circle_model + ".output" + str(idx) + ".dtype", 'w') as dtype_file:
+ dtype_file.write(output_dtype)
except:
print(traceback.format_exc())
quit(255)
diff --git a/compiler/luci-value-test/luci_eval_verifier_ref.py b/compiler/luci-value-test/luci_eval_verifier_ref.py
new file mode 100755
index 000000000..5313e336e
--- /dev/null
+++ b/compiler/luci-value-test/luci_eval_verifier_ref.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+import numpy as np
+import subprocess
+import argparse
+import traceback
+import os
+
+#
+# This script compares the execution result of luci-interpreter with that from ref_model path
+#
+# Basic usage:
+# luci_eval_verifier_ref.py --driver build/compiler/luci-eval-driver/luci_eval_driver
+# --ref_model ref_model_path --model this_model_path
+# Assumption:
+# these file exist with its purpose
+# - ref_model_path.circle; circle model
+# - ref_model_path.circle.inputN; N'th input numpy data
+# - ref_model_path.circle.inputN.dtype; N'th input data type in text
+# - ref_model_path.circle.inputN.shape; N'th input data shape in CSV
+# - ref_model_path.circle.outputN; N'th output numpy data
+# - ref_model_path.circle.outputN.dtype; N'th output data type in text
+# - ref_model_path.circle.outputN.shape; N'th output data shape in CSV
+
+
+def dtype_from_file(file_path):
+ with open(file_path, 'r') as dtype_file:
+ dtype_str = dtype_file.read()
+ if dtype_str == "float32":
+ return np.float32
+ if dtype_str == "uint8":
+ return np.uint8
+ if dtype_str == "int16":
+ return np.int16
+ if dtype_str == "int32":
+ return np.int32
+ if dtype_str == "int64":
+ return np.int64
+ if dtype_str == "bool":
+ return np.bool_
+ raise SystemExit("Unsupported dtype from file", dtype_str)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--model_ref', type=str, required=True)
+parser.add_argument('--work_path', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
+args = parser.parse_args()
+
+driver = args.driver
+circle_model_ref = args.model_ref + ".circle"
+circle_model = args.work_path + ".circle"
+# circle_model is used as to follow existing luci_eval_verifier.py
+
+rtolf32 = 1e-5
+atolf32 = 1e-5
+try:
+ if args.rtolf32 != None:
+ rtolf32 = float(args.rtolf32)
+ if args.atolf32 != None:
+ atolf32 = float(args.atolf32)
+except ValueError:
+ print("rtolf32 or atolf32 is not a number")
+ quit(128)
+
+# get num of inputs by checking existance of model.inputN
+check_input = 0
+while True:
+ input_file_path = circle_model_ref + ".input" + str(check_input)
+ if not os.path.isfile(input_file_path):
+ num_inputs = check_input
+ break
+ check_input = check_input + 1
+
+if num_inputs == 0:
+ print("input file not exist for", circle_model_ref)
+ quit(128)
+
+# get num of outputs by checking existance of model.outputN
+check_output = 0
+while True:
+ output_file_path = circle_model_ref + ".output" + str(check_output)
+ if not os.path.isfile(output_file_path):
+ num_outputs = check_output
+ break
+ check_output = check_output + 1
+
+if num_outputs == 0:
+ print("output file not exist for", circle_model_ref)
+ quit(128)
+
+# Execute luci interpreter with reference input
+subprocess.run(
+ [
+ driver, circle_model_ref,
+ str(num_inputs), circle_model_ref + ".input", circle_model + ".output"
+ ],
+ check=True)
+
+# Compare the results.
+for idx in range(num_outputs):
+ output_dtype = dtype_from_file(circle_model_ref + ".output" + str(idx) + ".dtype")
+ shape_file = open(circle_model_ref + ".output" + str(idx) + ".shape", 'r')
+ output_shape = [int(i) for i in shape_file.read().split(',')]
+
+ output_data_ref = np.fromfile(circle_model_ref + ".output" + str(idx), output_dtype)
+ luci_output_data_ref = np.reshape(output_data_ref, output_shape)
+
+ output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
+ luci_output_data = np.reshape(output_data, output_shape)
+
+ try:
+ if output_dtype == np.uint8:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ elif output_dtype == np.float32:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=rtolf32,
+ atol=atolf32) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ elif output_dtype == np.int64:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ elif output_dtype == np.int32:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ elif output_dtype == np.int16:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ elif output_dtype == np.bool_:
+ if np.allclose(
+ luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ raise SystemExit("Execution result of " + circle_model_ref +
+ " does not match with " + circle_model)
+ else:
+ raise SystemExit("Unsupported data type: ", output_dtype)
+ except:
+ print(traceback.format_exc())
+ quit(255)
+
+quit(0)
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst
index 2b5c93fa3..f62b72919 100644
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -20,90 +20,90 @@ addeval(ArgMax_U8_003)
#addeval(ArgMin_U8_002)
#addeval(ArgMin_U8_003)
addeval(AveragePool2D_000)
-#addeval(BatchMatMul_000)
+addeval(BatchMatMul_000)
#addeval(BatchMatMulV2_000)
#addeval(BatchMatMulV2_001)
#addeval(BatchToSpaceND_000)
-#addeval(Cast_000)
-#addeval(Cast_001)
+addeval(Cast_000)
+addeval(Cast_001)
#addeval(Ceil_000)
addeval(Concatenation_000)
addeval(Concatenation_U8_000)
addeval(Conv2D_000)
addeval(Conv2D_001)
addeval(Conv2D_002)
-#addeval(Conv2D_003)
+addeval(Conv2D_003)
addeval(Conv2D_U8_000)
addeval(Conv2D_U8_001)
#addeval(Cos_000)
-#addeval(DepthToSpace_000)
+addeval(DepthToSpace_000)
addeval(DepthwiseConv2D_000)
addeval(DepthwiseConv2D_U8_000)
#addeval(DepthwiseConv2D_U8_001)
addeval(DepthwiseConv2D_001)
-#addeval(Div_000)
+addeval(Div_000)
addeval(ELU_000)
-#addeval(Equal_000)
-#addeval(Exp_000)
+addeval(Equal_000)
+addeval(Exp_000)
#addeval(ExpandDims_000)
#addeval(ExpandDims_001)
#addeval(ExpandDims_002)
#addeval(ExpandDims_003)
#addeval(Fill_000)
#addeval(Fill_001)
-#addeval(Floor_000)
-#addeval(FloorDiv_000)
-#addeval(FloorDiv_001)
+addeval(Floor_000)
+addeval(FloorDiv_000)
+addeval(FloorDiv_001)
#addeval(FloorMod_000)
#addeval(FloorMod_001)
addeval(FullyConnected_000)
addeval(FullyConnected_001)
addeval(FullyConnected_002)
#addeval(FullyConnected_U8_000)
-#addeval(Gather_000)
+addeval(Gather_000)
#addeval(GatherNd_000)
#addeval(Greater_000)
-#addeval(GreaterEqual_000)
+addeval(GreaterEqual_000)
addeval(If_000)
addeval(If_001)
addeval(L2Normalize_000)
addeval(L2Pool2D_000)
#addeval(L2Pool2D_U8_000)
addeval(LeakyRelu_000)
-#addeval(Less_000)
-#addeval(LessEqual_000)
+addeval(Less_000)
+addeval(LessEqual_000)
addeval(LocalResponseNormalization_000)
#addeval(Log_000)
-#addeval(LogicalAnd_000)
-#addeval(LogicalNot_000)
-#addeval(LogicalOr_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
addeval(Logistic_000)
-#addeval(LogSoftmax_000)
+addeval(LogSoftmax_000)
#addeval(MatMul_000)
#addeval(MatrixDiag_000)
#addeval(MatrixSetDiag_000)
-#addeval(Maximum_000)
+addeval(Maximum_000)
addeval(MaxPool2D_000)
addeval(MaxPool2D_U8_000)
addeval(Mean_000)
addeval(Mean_001)
-#addeval(Mean_U8_000)
-#addeval(Minimum_000)
+addeval(Mean_U8_000)
+addeval(Minimum_000)
#addeval(MirrorPad_000)
addeval(Mul_000)
#addeval(Mul_U8_000)
-#addeval(Neg_000)
-#addeval(NotEqual_000)
-#addeval(OneHot_000)
-#addeval(OneHot_001)
-#addeval(OneHot_002)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
#addeval(OneHot_003)
-#addeval(Pack_000)
-#addeval(Pack_U8_000)
+addeval(Pack_000)
+addeval(Pack_U8_000)
addeval(Pad_000)
addeval(Pad_U8_000)
-#addeval(Pow_000)
-#addeval(PRelu_000)
+addeval(Pow_000)
+addeval(PRelu_000)
#addeval(Range_000)
#addeval(Rank_000)
#addeval(ReduceAny_000)
@@ -116,20 +116,20 @@ addeval(Pad_U8_000)
#addeval(ReduceProd_001)
#addeval(ReduceProd_002)
#addeval(ReduceProd_003)
-#addeval(ReLU_000)
-#addeval(ReLU6_000)
+addeval(ReLU_000)
+addeval(ReLU6_000)
#addeval(ReLUN1To1_000)
addeval(Reshape_000)
addeval(Reshape_001)
addeval(Reshape_002)
#addeval(Reshape_003)
addeval(Reshape_U8_000)
-#addeval(ResizeBilinear_000)
-#addeval(ResizeNearestNeighbor_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
#addeval(ReverseSequence_000)
#addeval(ReverseV2_000)
#addeval(Round_000)
-#addeval(Rsqrt_000)
+addeval(Rsqrt_000)
#addeval(ScatterNd_000)
#addeval(SegmentSum_000)
#addeval(Select_000)
@@ -139,37 +139,39 @@ addeval(Reshape_U8_000)
#addeval(SelectV2_001)
#addeval(SelectV2_002)
#addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
#addeval(Sin_000)
addeval(Slice_000)
addeval(Softmax_000)
-#addeval(Softmax_U8_000)
-#addeval(SpaceToBatchND_000)
-#addeval(SpaceToBatchND_001)
-#addeval(SpaceToBatchND_002)
-#addeval(SpaceToBatchND_003)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
addeval(SpaceToDepth_000)
#addeval(SparseToDense_000)
addeval(Split_000)
-#addeval(SplitV_000)
-#addeval(Sqrt_000)
-#addeval(Square_000)
-#addeval(SquaredDifference_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
addeval(Squeeze_000)
addeval(Squeeze_001)
addeval(StridedSlice_000)
addeval(StridedSlice_001)
addeval(StridedSlice_002)
-#addeval(Sub_000)
-#addeval(Sub_U8_000)
+addeval(Sub_000)
+addeval(Sub_U8_000)
#addeval(Sum_000)
#addeval(Sum_001)
-#addeval(Tanh_000)
+addeval(Tanh_000)
#addeval(Tile_000)
#addeval(Tile_U8_000)
#addeval(TopKV2_000)
#addeval(TopKV2_001)
addeval(Transpose_000)
-#addeval(TransposeConv_000)
+addeval(TransposeConv_000)
addeval(Unpack_000)
addeval(Unpack_001)
addeval(Unpack_002)
@@ -180,9 +182,13 @@ addeval(Unpack_003)
#addeval(While_001)
#addeval(While_002)
#addeval(While_003)
-#addeval(YUV_TO_RGB_U8_000)
+addeval(YUV_TO_RGB_U8_000)
#addeval(ZerosLike_000)
# Simple Network test
addeval(Part_While_000)
addeval(Part_While_001)
+
+# Tests with tolerance
+addevaltol(SVDF_000 8e-3 8e-3)
+addevaltol(SVDF_001 8e-3 8e-3)
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
index b92eefb40..460dc7b23 100644
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -23,4 +23,8 @@ add_subdirectory(import)
add_subdirectory(export)
add_subdirectory(tester)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_subdirectory(tests)
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index a267d0e1f..f46181eb6 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -12,7 +12,7 @@ target_include_directories(luci_export PUBLIC include)
target_link_libraries(luci_export PRIVATE luci_lang)
target_link_libraries(luci_export PRIVATE luci_service)
target_link_libraries(luci_export PRIVATE luci_pass)
-target_link_libraries(luci_export PRIVATE mio_circle)
+target_link_libraries(luci_export PRIVATE mio_circle04)
target_link_libraries(luci_export PRIVATE luci_env)
target_link_libraries(luci_export PRIVATE luci_log)
target_link_libraries(luci_export PRIVATE luci_logex)
@@ -36,6 +36,6 @@ target_include_directories(luci_export_test PRIVATE src)
target_link_libraries(luci_export_test luci_export)
target_link_libraries(luci_export_test luci_plan)
target_link_libraries(luci_export_test luci_lang)
-target_link_libraries(luci_export_test mio_circle)
+target_link_libraries(luci_export_test mio_circle04)
target_link_libraries(luci_export_test luci_env)
target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
new file mode 100644
index 000000000..0ff21a34b
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+#define __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+
+#include "CircleExporterUtils.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+namespace luci
+{
+
+// NOTE Virtual nodes are not circle builtin operators.
+// Therefore, they are not defined here.
+class BuiltinOptionsExtractor final
+ : public luci::CircleNodeMutableVisitor<flatbuffers::Offset<void>>
+{
+public:
+ BuiltinOptionsExtractor(flatbuffers::FlatBufferBuilder &builder) : _builder{builder}
+ {
+ // DO NOTHING
+ }
+
+public:
+ flatbuffers::Offset<void> visit(luci::CircleAbs *)
+ {
+ return circle::CreateAbsOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleAdd *node)
+ {
+ return circle::CreateAddOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleAddN *)
+ {
+ return circle::CreateAddNOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleArgMax *node)
+ {
+ return circle::CreateArgMaxOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleArgMin *node)
+ {
+ return circle::CreateArgMinOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleAveragePool2D *node)
+ {
+ return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+ node->stride()->h(), node->filter()->w(),
+ node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleBatchMatMul *node)
+ {
+ return circle::CreateBatchMatMulOptions(_builder, node->adj_x(), node->adj_y()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleBatchToSpaceND *)
+ {
+ return circle::CreateBatchToSpaceNDOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleBidirectionalSequenceLSTM *node)
+ {
+ return circle::CreateBidirectionalSequenceLSTMOptions(
+ _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+ node->proj_clip(), node->merge_outputs(), node->time_major(),
+ node->asymmetric_quantize_inputs())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleCast *node)
+ {
+ if (node->out_data_type() == loco::DataType::Unknown)
+ return _no_option;
+ else
+ return circle::CreateCastOptions(_builder, luci::to_circle_tensortype(node->in_data_type()),
+ luci::to_circle_tensortype(node->out_data_type()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleCeil *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleConcatenation *node)
+ {
+ return circle::CreateConcatenationOptions(_builder, node->axis(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ // CircleConst is not virtual but not builtinOperator
+ // flatbuffers::Offset<void> visit(luci::CircleConst *)
+ flatbuffers::Offset<void> visit(luci::CircleConv2D *node)
+ {
+ return circle::CreateConv2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+ node->stride()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()),
+ node->dilation()->w(), node->dilation()->h())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleCos *)
+ {
+ return circle::CreateCosOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node)
+ {
+ return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleDepthwiseConv2D *node)
+ {
+ return circle::CreateDepthwiseConv2DOptions(
+ _builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h(),
+ node->depthMultiplier(), to_circle_actfunc(node->fusedActivationFunction()),
+ node->dilation()->w(), node->dilation()->h())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleDequantize *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleDiv *node)
+ {
+ return circle::CreateDivOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleElu *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleEqual *)
+ {
+ return circle::CreateEqualOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleExp *)
+ {
+ return circle::CreateExpOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleExpandDims *)
+ {
+ return circle::CreateExpandDimsOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleFakeQuant *node)
+ {
+ return circle::CreateFakeQuantOptions(_builder, node->min(), node->max(), node->num_bits(),
+ node->narrow_range())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleFill *)
+ {
+ return circle::CreateFillOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleFloor *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleFloorDiv *)
+ {
+ return circle::CreateFloorDivOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleFloorMod *)
+ {
+ return circle::CreateFloorModOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleFullyConnected *node)
+ {
+ return circle::CreateFullyConnectedOptions(
+ _builder, to_circle_actfunc(node->fusedActivationFunction()),
+ to_circle_weightsformat(node->weights_format()), node->keep_num_dims())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleGather *node)
+ {
+ return circle::CreateGatherOptions(_builder, node->axis()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleGatherNd *)
+ {
+ return circle::CreateGatherNdOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleGreater *)
+ {
+ return circle::CreateGreaterOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleGreaterEqual *)
+ {
+ return circle::CreateGreaterEqualOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleIf *node)
+ {
+ return circle::CreateIfOptions(_builder, node->then_branch(), node->else_branch()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleL2Normalize *node)
+ {
+ return circle::CreateL2NormOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleL2Pool2D *node)
+ {
+ return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+ node->stride()->h(), node->filter()->w(),
+ node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLeakyRelu *node)
+ {
+ return circle::CreateLeakyReluOptions(_builder, node->alpha()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLess *)
+ {
+ return circle::CreateLessOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLessEqual *)
+ {
+ return circle::CreateLessEqualOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLocalResponseNormalization *node)
+ {
+ return circle::CreateLocalResponseNormalizationOptions(_builder, node->radius(), node->bias(),
+ node->alpha(), node->beta())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLog *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleLogicalAnd *)
+ {
+ return circle::CreateLogicalAndOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLogicalNot *)
+ {
+ return circle::CreateLogicalNotOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLogicalOr *)
+ {
+ return circle::CreateLogicalOrOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleLogistic *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleLogSoftmax *)
+ {
+ return circle::CreateLogSoftmaxOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMatrixDiag *)
+ {
+ return circle::CreateMatrixDiagOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMatrixSetDiag *)
+ {
+ return circle::CreateMatrixSetDiagOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMaximum *)
+ {
+ return circle::CreateMaximumMinimumOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMaxPool2D *node)
+ {
+ return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+ node->stride()->h(), node->filter()->w(),
+ node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMean *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMinimum *)
+ {
+ return circle::CreateMaximumMinimumOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMirrorPad *node)
+ {
+ return circle::CreateMirrorPadOptions(_builder, to_circle_mirrorpadmode(node->mode())).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleMul *node)
+ {
+ return circle::CreateMulOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleNeg *)
+ {
+ return circle::CreateNegOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV4 *)
+ {
+ return circle::CreateNonMaxSuppressionV4Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV5 *)
+ {
+ return circle::CreateNonMaxSuppressionV5Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleNotEqual *)
+ {
+ return circle::CreateNotEqualOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleOneHot *node)
+ {
+ return circle::CreateOneHotOptions(_builder, node->axis()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CirclePack *node)
+ {
+ return circle::CreatePackOptions(_builder, node->values_count(), node->axis()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CirclePad *)
+ {
+ return circle::CreatePadOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CirclePadV2 *)
+ {
+ return circle::CreatePadV2Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CirclePow *)
+ {
+ return circle::CreatePowOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CirclePRelu *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleQuantize *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleRange *)
+ {
+ return circle::CreateRangeOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleRank *)
+ {
+ return circle::CreateRankOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReduceAny *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReduceMax *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReduceMin *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReduceProd *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleRelu *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleRelu6 *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleReluN1To1 *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleReshape *node)
+ {
+ auto new_shape = _builder.CreateVector<int32_t>(
+ node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+ return circle::CreateReshapeOptions(_builder, new_shape).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleResizeBilinear *node)
+ {
+ return circle::CreateResizeBilinearOptions(_builder, node->align_corners(),
+ node->half_pixel_centers())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleResizeNearestNeighbor *node)
+ {
+ return circle::CreateResizeNearestNeighborOptions(_builder, node->align_corners()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReverseSequence *node)
+ {
+ return circle::CreateReverseSequenceOptions(_builder, node->seq_axis(), node->batch_axis())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleReverseV2 *)
+ {
+ return circle::CreateReverseV2Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleRound *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleRsqrt *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleScatterNd *)
+ {
+ return circle::CreateScatterNdOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSegmentSum *)
+ {
+ return circle::CreateSegmentSumOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSelect *)
+ {
+ return circle::CreateSelectOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSelectV2 *)
+ {
+ return circle::CreateSelectV2Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleShape *node)
+ {
+ return circle::CreateShapeOptions(_builder, luci::to_circle_tensortype(node->out_type()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSin *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleSlice *)
+ {
+ return circle::CreateSliceOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSoftmax *node)
+ {
+ return circle::CreateSoftmaxOptions(_builder, node->beta()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSpaceToBatchND *)
+ {
+ return circle::CreateSpaceToBatchNDOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSpaceToDepth *node)
+ {
+ return circle::CreateSpaceToDepthOptions(_builder, node->block_size()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSparseToDense *node)
+ {
+ return circle::CreateSparseToDenseOptions(_builder, node->validate_indices()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSplit *node)
+ {
+ return circle::CreateSplitOptions(_builder, node->num_split()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSplitV *node)
+ {
+ return circle::CreateSplitVOptions(_builder, node->num_split()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSqrt *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleSquare *)
+ {
+ return circle::CreateSquareOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSquaredDifference *)
+ {
+ return circle::CreateSquaredDifferenceOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSqueeze *node)
+ {
+ auto squeeze_dims = _builder.CreateVector<int32_t>(node->squeeze_dims());
+ return circle::CreateSqueezeOptions(_builder, squeeze_dims).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleStridedSlice *node)
+ {
+ return circle::CreateStridedSliceOptions(_builder, node->begin_mask(), node->end_mask(),
+ node->ellipsis_mask(), node->new_axis_mask(),
+ node->shrink_axis_mask())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSub *node)
+ {
+ return circle::CreateSubOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSum *node)
+ {
+ return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleSVDF *node)
+ {
+ return circle::CreateSVDFOptions(_builder, node->svdf_rank(),
+ to_circle_actfunc(node->fusedActivationFunction()),
+ node->asymmetric_quantize_inputs())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleTanh *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleTile *)
+ {
+ return circle::CreateTileOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleTopKV2 *)
+ {
+ return circle::CreateTopKV2Options(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleTranspose *)
+ {
+ return circle::CreateTransposeOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleTransposeConv *node)
+ {
+ return circle::CreateTransposeConvOptions(_builder, getOpPadding(node->padding()),
+ node->stride()->w(), node->stride()->h())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleUnidirectionalSequenceLSTM *node)
+ {
+ return circle::CreateUnidirectionalSequenceLSTMOptions(
+ _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+ node->proj_clip(), node->time_major(), node->asymmetric_quantize_inputs())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleUnique *node)
+ {
+ return circle::CreateUniqueOptions(_builder, luci::to_circle_tensortype(node->idx_out_type()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleUnpack *node)
+ {
+ return circle::CreateUnpackOptions(_builder, node->num(), node->axis()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleWhere *)
+ {
+ return circle::CreateWhereOptions(_builder).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleWhile *node)
+ {
+ return circle::CreateWhileOptions(_builder, node->cond_branch(), node->body_branch()).Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleZerosLike *)
+ {
+ return circle::CreateZerosLikeOptions(_builder).Union();
+ }
+ // Circle only
+ flatbuffers::Offset<void> visit(luci::CircleBCQFullyConnected *node)
+ {
+ return circle::CreateBCQFullyConnectedOptions(
+ _builder, node->weights_hidden_size(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleBCQGather *node)
+ {
+ return circle::CreateBCQGatherOptions(_builder, node->input_hidden_size(), node->axis())
+ .Union();
+ }
+ flatbuffers::Offset<void> visit(luci::CircleInstanceNorm *node)
+ {
+ return circle::CreateInstanceNormOptions(_builder, node->epsilon(),
+ to_circle_actfunc(node->fusedActivationFunction()))
+ .Union();
+ }
+
+protected:
+ flatbuffers::FlatBufferBuilder &_builder;
+
+private:
+ const flatbuffers::Offset<void> _no_option = 0;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
diff --git a/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h
new file mode 100644
index 000000000..6f7c0f70e
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+#define __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+class BuiltinOperatorMappingRule final : public CircleNodeVisitor<circle::BuiltinOperator>
+{
+public:
+ BuiltinOperatorMappingRule()
+ {
+ // DO NOTHING
+ }
+
+public:
+ static BuiltinOperatorMappingRule &get()
+ {
+ static BuiltinOperatorMappingRule instance;
+ return instance;
+ }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+ circle::BuiltinOperator visit(const CIRCLE_NODE *) final { return circle::OP; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+class BuiltinOptionsMappingRule final : public CircleNodeVisitor<circle::BuiltinOptions>
+{
+public:
+ BuiltinOptionsMappingRule()
+ {
+ // DO NOTHING
+ }
+
+public:
+ static BuiltinOptionsMappingRule &get()
+ {
+ static BuiltinOptionsMappingRule instance;
+ return instance;
+ }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+ circle::BuiltinOptions visit(const CIRCLE_NODE *) final { return circle::OPTION; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp
index 5868c176c..083add9be 100644
--- a/compiler/luci/export/src/CircleExporterImpl.cpp
+++ b/compiler/luci/export/src/CircleExporterImpl.cpp
@@ -79,14 +79,19 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode,
for (auto it : opcodes)
{
uint32_t idx = it.second;
+ int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+ if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+ dep_code = static_cast<int8_t>(it.first.opcode);
if (it.first.opcode != BuiltinOperator_CUSTOM)
{
- operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode, 0, it.first.version);
+ operator_codes_vec[idx] =
+ CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode);
}
else
{
operator_codes_vec[idx] =
- CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
+ CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code),
+ it.first.version, it.first.opcode);
}
}
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp
index 3a7ba304f..9473c2c4e 100644
--- a/compiler/luci/export/src/CircleExporterUtils.cpp
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -15,6 +15,7 @@
*/
#include "CircleExporterUtils.h"
+#include "CircleBuiltinTypesMappingRule.h"
#include <oops/InternalExn.h>
@@ -163,36 +164,63 @@ circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVe
}
}
-} // namespace luci
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node)
+{
+ return node->accept(&BuiltinOperatorMappingRule::get());
+}
-namespace luci
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node)
{
+ if (auto cast = dynamic_cast<const luci::CircleCast *>(node))
+ {
+ return (cast->out_data_type() == loco::DataType::Unknown) ? circle::BuiltinOptions_NONE
+ : circle::BuiltinOptions_CastOptions;
+ }
-uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
- const int32_t op_version)
+ return node->accept(&BuiltinOptionsMappingRule::get());
+}
+
+std::string circle_custom_code(const luci::CircleNode *node)
{
- assert(op_version > 0);
+ if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
+ {
+ return custom_node->custom_code();
+ }
- auto it = _operator_codes.find(OpCode{builtin_code, "", op_version});
- if (it != _operator_codes.end())
+ return "";
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node)
+{
+ if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
{
- return it->second;
+ std::vector<uint8_t> custom_options_vec{custom_node->custom_options().begin(),
+ custom_node->custom_options().end()};
+ return fb.CreateVector(custom_options_vec);
}
- auto idx = static_cast<uint32_t>(_operator_codes.size());
- _operator_codes.emplace(OpCode{builtin_code, "", op_version}, idx);
- return idx;
+
+ return 0;
}
-uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_code)
+} // namespace luci
+
+namespace luci
{
- const circle::BuiltinOperator builtin_code = circle::BuiltinOperator_CUSTOM;
- auto it = _operator_codes.find(OpCode{builtin_code, custom_code});
+
+uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+ const std::string &custom_code,
+ const int32_t op_version)
+{
+ assert(op_version > 0);
+
+ auto it = _operator_codes.find(OpCode{builtin_code, custom_code, op_version});
if (it != _operator_codes.end())
{
return it->second;
}
auto idx = static_cast<uint32_t>(_operator_codes.size());
- _operator_codes.emplace(OpCode{builtin_code, custom_code}, idx);
+ _operator_codes.emplace(OpCode{builtin_code, custom_code, op_version}, idx);
return idx;
}
diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h
index 95310b353..4a4c54a69 100644
--- a/compiler/luci/export/src/CircleExporterUtils.h
+++ b/compiler/luci/export/src/CircleExporterUtils.h
@@ -39,6 +39,12 @@ flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferB
const SparseIndexVector &sparse_idx_vec);
circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVectorType type);
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node);
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node);
+std::string circle_custom_code(const luci::CircleNode *node);
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node);
+
} // namespace luci
namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
index be64a52d4..b300a7fcf 100644
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -15,1686 +15,30 @@
*/
#include "CircleOperationExporter.h"
-#include "CircleExporterUtils.h"
-#include "Check.h"
+#include "CircleOperationExporterRule.h"
#include <luci/IR/CircleNode.h>
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Plan/CircleNodeExecutionPlan.h>
-#include <luci/UserSettings.h>
-#include <luci/Log.h>
+#include <loco/IR/Algorithm.h>
-#include <loco/IR/CanonicalNodeVisitor.h>
-#include <oops/InternalExn.h>
-
-#include <flatbuffers/flexbuffers.h>
-
-using namespace flatbuffers;
-using namespace circle;
-
-namespace
-{
-
-using namespace luci;
-
-struct ExportContext
-{
- FlatBufferBuilder &builder;
- SerializedModelData &md;
- SerializedGraphData &gd;
-};
-
-/**
- * @brief Exports CircleMaxPool2D or CircleAveragePool2D
- *
- * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
- */
-template <class CirclePool2D>
-void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
- LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
- builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
- "Should be L2Pool, MaxPool or AvgPool");
- LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
-
- circle::Padding padding = getOpPadding(node->padding());
-
- auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
- node->filter()->w(), node->filter()->h(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_Pool2DOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
- circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
-{
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(node)};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
- ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes having void options
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
-{
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleAddN *node)
-{
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateAddNOptions(ctx.builder);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_AddNOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCast *node)
-{
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
-
- flatbuffers::Offset<Operator> op_offset;
- if (node->out_data_type() != loco::DataType::Unknown)
- {
- auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
- to_circle_tensortype(node->out_data_type()));
- op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_CastOptions, options.Union());
- }
- else
- {
- op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
- }
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
-{
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->numValues(); ++i)
- inputs_vec.push_back(get_tensor_index(node->values(i)));
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ConcatenationOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCustom *node)
-{
- auto custom_outputs = loco::succs(node);
- assert(custom_outputs.size() == node->numOutputs());
-
- uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t index = 0; index < node->numInputs(); index++)
- {
- inputs_vec.push_back(get_tensor_index(node->inputs(index)));
- }
- for (uint32_t index = 0; index < custom_outputs.size(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : custom_outputs)
- {
- auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
- if (custom_out->index() == static_cast<int32_t>(index))
- {
- outputs_vec.push_back(get_tensor_index(custom_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Custom output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
- std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
- node->custom_options().end()};
- circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void>(), circle_custom_options);
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleIf *node)
-{
- auto if_outs = loco::succs(node);
- assert(if_outs.size() == node->output_count());
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- inputs_vec.push_back(get_tensor_index(node->cond()));
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : if_outs)
- {
- auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
- if (if_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(if_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleIf output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_IfOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
-{
- auto nms_outs = loco::succs(node);
- assert(nms_outs.size() == 2);
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
- node->op_version());
- std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()),
- };
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : nms_outs)
- {
- auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
- if (nms_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(nms_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
- auto op_offset =
- CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
-{
- auto nms_outs = loco::succs(node);
- assert(nms_outs.size() == 3);
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
- node->op_version());
- std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
- };
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : nms_outs)
- {
- auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
- if (nms_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(nms_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
- auto op_offset =
- CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
-{
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateReverseV2Options(ctx.builder);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplit *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
- // NOTE BuiltinOperator_SPLIT input is placed at second position
- std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
- get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Split output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateSplitOptions(ctx.builder, node->num_split());
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplitV *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
- get_tensor_index(node->size_splits()),
- get_tensor_index(node->split_dim())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid SplitV output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateSplitVOptions(ctx.builder, node->num_split());
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitVOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
-{
- auto topkv2_outs = loco::succs(node);
- int outs_count = int32_t(topkv2_outs.size());
- assert(outs_count == 2);
-
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < outs_count; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : topkv2_outs)
- {
- auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
- if (topkv2_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(topkv2_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid TopKV2 output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateTopKV2Options(ctx.builder);
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_TopKV2Options, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleUnique *node)
-{
- auto unique_outs = loco::succs(node);
- assert(int32_t(unique_outs.size()) == 2);
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
-
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < 2; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unique_outs)
- {
- auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
- if (unique_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unique_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Unique output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UniqueOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleUnpack *node)
-{
- LOGGER(l);
- auto settings = luci::UserSettings::settings();
-
- auto unpack_outs = loco::succs(node);
- // NOTE real models may not use all of the outputs
- if (static_cast<int32_t>(unpack_outs.size()) != node->num())
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
- }
- else
- assert(false);
- }
-
- uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unpack_outs)
- {
- auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
- if (unpack_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unpack_out));
- found = true;
- break;
- }
- }
- // NOTE real models may not use all of the outputs
- if (!found)
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
- }
- else
- assert(false);
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UnpackOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleWhile *node)
-{
- auto while_outs = loco::succs(node);
- assert(while_outs.size() == node->output_count());
-
- uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : while_outs)
- {
- auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
- if (while_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(while_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleWhile output");
- }
- }
-
- auto inputs = ctx.builder.CreateVector(inputs_vec);
- auto outputs = ctx.builder.CreateVector(outputs_vec);
- auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
- auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_WhileOptions, options.Union());
- ctx.gd._operators.push_back(op_offset);
-}
-
-class ExportHelper
-{
-public:
- ExportHelper(ExportContext &ctx) : _ctx{ctx}
- {
- // DO NOTHING
- }
-
-protected:
- /**
- * @brief export simple nodes
- */
- void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
- flatbuffers::Offset<void> options_offset)
- {
- export_node(_ctx, node, bop, bot, options_offset);
- }
-
- /**
- * @brief export simple nodes having void options
- */
- void export_simple(loco::Node *node, circle::BuiltinOperator bop)
- {
- export_node(_ctx, node, bop);
- }
-
-protected:
- ExportContext &_ctx;
-};
-
-enum class OE
-{
- ABC,
- DEF,
- GHIJ,
- KLMN,
- OPQR,
- STUV,
- WXYZ,
- CIRC, // circle only
- VIRT, // virtual
-};
-
-class OperationExporter final : public ExportHelper
-{
-public:
- OperationExporter(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void export_node(luci::CircleNode *);
-};
-
-template <OE oe> class OpExporterLet;
-
-template <>
-class OpExporterLet<OE::ABC> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- // NOTE visit for luci::CircleNode is added NOT to throw NYI
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleAbs *) final;
- void visit(luci::CircleAdd *) final;
- void visit(luci::CircleAddN *) final;
- void visit(luci::CircleArgMax *) final;
- void visit(luci::CircleArgMin *) final;
- void visit(luci::CircleAveragePool2D *) final;
- void visit(luci::CircleBatchMatMul *) final;
- void visit(luci::CircleBatchToSpaceND *) final;
- void visit(luci::CircleBidirectionalSequenceLSTM *) final;
- void visit(luci::CircleCast *) final;
- void visit(luci::CircleCeil *) final;
- void visit(luci::CircleConcatenation *) final;
- void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
- void visit(luci::CircleConv2D *) final;
- void visit(luci::CircleCos *) final;
- void visit(luci::CircleCustom *) final;
-};
-
-template <>
-class OpExporterLet<OE::DEF> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleDepthToSpace *) final;
- void visit(luci::CircleDepthwiseConv2D *) final;
- void visit(luci::CircleDequantize *) final;
- void visit(luci::CircleDiv *) final;
- void visit(luci::CircleElu *) final;
- void visit(luci::CircleEqual *) final;
- void visit(luci::CircleExp *) final;
- void visit(luci::CircleExpandDims *) final;
- void visit(luci::CircleFakeQuant *) final;
- void visit(luci::CircleFill *) final;
- void visit(luci::CircleFloor *) final;
- void visit(luci::CircleFloorDiv *) final;
- void visit(luci::CircleFloorMod *) final;
- void visit(luci::CircleFullyConnected *) final;
-};
-
-template <>
-class OpExporterLet<OE::GHIJ> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleGather *) final;
- void visit(luci::CircleGatherNd *) final;
- void visit(luci::CircleGreater *) final;
- void visit(luci::CircleGreaterEqual *) final;
- void visit(luci::CircleIf *) final;
-};
-
-template <>
-class OpExporterLet<OE::KLMN> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleL2Normalize *) final;
- void visit(luci::CircleL2Pool2D *) final;
- void visit(luci::CircleLeakyRelu *) final;
- void visit(luci::CircleLess *) final;
- void visit(luci::CircleLessEqual *) final;
- void visit(luci::CircleLocalResponseNormalization *) final;
- void visit(luci::CircleLog *) final;
- void visit(luci::CircleLogicalAnd *) final;
- void visit(luci::CircleLogicalNot *) final;
- void visit(luci::CircleLogicalOr *) final;
- void visit(luci::CircleLogistic *) final;
- void visit(luci::CircleLogSoftmax *) final;
- void visit(luci::CircleMatrixDiag *) final;
- void visit(luci::CircleMatrixSetDiag *) final;
- void visit(luci::CircleMaximum *) final;
- void visit(luci::CircleMaxPool2D *) final;
- void visit(luci::CircleMean *) final;
- void visit(luci::CircleMinimum *) final;
- void visit(luci::CircleMirrorPad *) final;
- void visit(luci::CircleMul *) final;
- void visit(luci::CircleNeg *) final;
- void visit(luci::CircleNonMaxSuppressionV4 *) final;
- void visit(luci::CircleNonMaxSuppressionV5 *) final;
- void visit(luci::CircleNotEqual *) final;
-};
-
-template <>
-class OpExporterLet<OE::OPQR> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleOneHot *) final;
- void visit(luci::CirclePack *) final;
- void visit(luci::CirclePad *) final;
- void visit(luci::CirclePadV2 *) final;
- void visit(luci::CirclePow *) final;
- void visit(luci::CirclePRelu *) final;
- void visit(luci::CircleQuantize *) final;
- void visit(luci::CircleRange *) final;
- void visit(luci::CircleRank *) final;
- void visit(luci::CircleReduceAny *) final;
- void visit(luci::CircleReduceMax *) final;
- void visit(luci::CircleReduceMin *) final;
- void visit(luci::CircleReduceProd *) final;
- void visit(luci::CircleRelu *) final;
- void visit(luci::CircleRelu6 *) final;
- void visit(luci::CircleReluN1To1 *) final;
- void visit(luci::CircleReshape *) final;
- void visit(luci::CircleResizeBilinear *) final;
- void visit(luci::CircleResizeNearestNeighbor *) final;
- void visit(luci::CircleReverseSequence *) final;
- void visit(luci::CircleReverseV2 *) final;
- void visit(luci::CircleRound *) final;
- void visit(luci::CircleRsqrt *) final;
-};
-
-template <>
-class OpExporterLet<OE::STUV> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleScatterNd *) final;
- void visit(luci::CircleSegmentSum *) final;
- void visit(luci::CircleSelect *) final;
- void visit(luci::CircleSelectV2 *) final;
- void visit(luci::CircleShape *) final;
- void visit(luci::CircleSin *) final;
- void visit(luci::CircleSlice *) final;
- void visit(luci::CircleSoftmax *) final;
- void visit(luci::CircleSpaceToBatchND *) final;
- void visit(luci::CircleSpaceToDepth *) final;
- void visit(luci::CircleSparseToDense *) final;
- void visit(luci::CircleSplit *) final;
- void visit(luci::CircleSplitV *) final;
- void visit(luci::CircleSqrt *) final;
- void visit(luci::CircleSquare *) final;
- void visit(luci::CircleSquaredDifference *) final;
- void visit(luci::CircleSqueeze *) final;
- void visit(luci::CircleStridedSlice *) final;
- void visit(luci::CircleSub *) final;
- void visit(luci::CircleSum *) final;
- void visit(luci::CircleTanh *) final;
- void visit(luci::CircleTile *) final;
- void visit(luci::CircleTopKV2 *) final;
- void visit(luci::CircleTranspose *) final;
- void visit(luci::CircleTransposeConv *) final;
- void visit(luci::CircleUnidirectionalSequenceLSTM *) final;
- void visit(luci::CircleUnique *) final;
- void visit(luci::CircleUnpack *) final;
-};
-
-template <>
-class OpExporterLet<OE::WXYZ> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- void visit(luci::CircleWhere *) final;
- void visit(luci::CircleWhile *) final;
- void visit(luci::CircleZerosLike *) final;
-};
-
-template <>
-class OpExporterLet<OE::CIRC> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- // Circle only
- void visit(luci::CircleBCQFullyConnected *) final;
- void visit(luci::CircleBCQGather *) final;
- void visit(luci::CircleInstanceNorm *) final;
-};
-
-template <>
-class OpExporterLet<OE::VIRT> final : public luci::CircleNodeMutableVisitor<void>,
- public ExportHelper
-{
-public:
- OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
- {
- // DO NOTHING
- }
-
-public:
- void visit(luci::CircleNode *) final {}
-
-public:
- // Virtual
- void visit(luci::CircleInput *) final {}
- void visit(luci::CircleOutput *) final {}
- void visit(luci::CircleOutputDummy *) final {}
- void visit(luci::CircleOutputExclude *) final {}
- // Virtual for multiple-outputs
- void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
- void visit(luci::CircleCustomOut *) final {}
- void visit(luci::CircleIfOut *) final {}
- void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
- void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
- void visit(luci::CircleSplitOut *) final {}
- void visit(luci::CircleSplitVOut *) final {}
- void visit(luci::CircleTopKV2Out *) final {}
- void visit(luci::CircleUniqueOut *) final {}
- void visit(luci::CircleUnpackOut *) final {}
- void visit(luci::CircleWhileOut *) final {}
-};
-
-void OperationExporter::export_node(luci::CircleNode *node)
-{
- // TODO revise return type to bool and return if handled
-#define VISIT_OE(GRP) \
- do \
- { \
- OpExporterLet<OE::GRP> oe(_ctx); \
- node->accept(&oe); \
- } while (false)
-
- VISIT_OE(ABC);
- VISIT_OE(DEF);
- VISIT_OE(GHIJ);
- VISIT_OE(KLMN);
- VISIT_OE(OPQR);
- VISIT_OE(STUV);
- VISIT_OE(WXYZ);
- VISIT_OE(CIRC);
- VISIT_OE(VIRT);
-
-#undef VISIT_OE
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAbs *node)
-{
- export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
- CreateAbsOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAdd *node)
-{
- export_simple(
- node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
- CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleArgMax *node)
-{
- export_simple(
- node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
- CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleArgMin *node)
-{
- export_simple(
- node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
- CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAveragePool2D *node)
-{
- export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBatchMatMul *node)
-{
- export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
- circle::BuiltinOptions_BatchMatMulOptions,
- CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBidirectionalSequenceLSTM *node)
-{
- auto bidi_lstm_outs = loco::succs(node);
- assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
- uint32_t op_idx = _ctx.md.registerBuiltinOpcode(
- circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, node->op_version());
-
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < 2; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : bidi_lstm_outs)
- {
- auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
- if (bidi_lstm_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(bidi_lstm_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
- }
- }
-
- auto inputs = _ctx.builder.CreateVector(inputs_vec);
- auto outputs = _ctx.builder.CreateVector(outputs_vec);
- auto options = CreateBidirectionalSequenceLSTMOptions(
- _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
- node->proj_clip(), node->merge_outputs(), node->time_major(),
- node->asymmetric_quantize_inputs());
- auto op_offset =
- CreateOperator(_ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_BidirectionalSequenceLSTMOptions, options.Union());
- _ctx.gd._operators.push_back(op_offset);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCast *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCeil *node)
-{
- export_simple(node, circle::BuiltinOperator_CEIL);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBatchToSpaceND *node)
-{
- export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
- circle::BuiltinOptions_BatchToSpaceNDOptions,
- CreateBatchToSpaceNDOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleConv2D *node)
-{
- export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
- CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
- node->stride()->w(), node->stride()->h(),
- to_circle_actfunc(node->fusedActivationFunction()),
- node->dilation()->w(), node->dilation()->h())
- .Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCos *node)
-{
- export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
- CreateCosOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDepthToSpace *node)
-{
- export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
- circle::BuiltinOptions_DepthToSpaceOptions,
- CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDepthwiseConv2D *node)
-{
- export_simple(
- node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions,
- CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(),
- node->stride()->h(), node->depthMultiplier(),
- to_circle_actfunc(node->fusedActivationFunction()),
- node->dilation()->w(), node->dilation()->h())
- .Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDequantize *node)
-{
- export_simple(node, circle::BuiltinOperator_DEQUANTIZE);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDiv *node)
-{
- export_simple(
- node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
- CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleElu *node)
-{
- export_simple(node, circle::BuiltinOperator_ELU);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleEqual *node)
-{
- export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
- CreateEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleExp *node)
-{
- export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
- CreateExpOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleExpandDims *node)
-{
- export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
- CreateExpandDimsOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFakeQuant *node)
-{
- export_simple(node, circle::BuiltinOperator_FAKE_QUANT, circle::BuiltinOptions_FakeQuantOptions,
- CreateFakeQuantOptions(_ctx.builder, node->min(), node->max(), node->num_bits(),
- node->narrow_range())
- .Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFill *node)
-{
- export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
- CreateFillOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloor *node)
-{
- export_simple(node, circle::BuiltinOperator_FLOOR);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloorDiv *node)
-{
- export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
- CreateFloorDivOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloorMod *node)
-{
- export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
- CreateFloorModOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFullyConnected *node)
-{
- export_simple(
- node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
- CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
- to_circle_weightsformat(node->weights_format()))
- .Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGather *node)
-{
- export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
- CreateGatherOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGatherNd *node)
-{
- export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
- CreateGatherNdOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreater *node)
-{
- export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
- CreateGreaterOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreaterEqual *node)
-{
- export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
- circle::BuiltinOptions_GreaterEqualOptions,
- CreateGreaterEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleIf *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Normalize *node)
-{
- export_simple(
- node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
- CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Pool2D *node)
-{
- export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLeakyRelu *node)
-{
- export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
- CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLess *node)
-{
- export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
- CreateLessOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLessEqual *node)
-{
- export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
- CreateLessEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLocalResponseNormalization *node)
-{
- export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
- circle::BuiltinOptions_LocalResponseNormalizationOptions,
- CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
- node->alpha(), node->beta())
- .Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLog *node)
-{
- export_simple(node, circle::BuiltinOperator_LOG);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalAnd *node)
-{
- export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
- CreateLogicalAndOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalNot *node)
-{
- export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
- CreateLogicalNotOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalOr *node)
-{
- export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
- CreateLogicalOrOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogistic *node)
-{
- export_simple(node, circle::BuiltinOperator_LOGISTIC);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogSoftmax *node)
-{
- export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
- CreateLogSoftmaxOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixDiag *node)
-{
- export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
- CreateMatrixDiagOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixSetDiag *node)
-{
- export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
- circle::BuiltinOptions_MatrixSetDiagOptions,
- CreateMatrixSetDiagOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMaximum *node)
-{
- export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMaxPool2D *node)
-{
- export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMean *node)
-{
- export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMinimum *node)
-{
- export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMirrorPad *node)
-{
- export_simple(
- node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
- CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMul *node)
-{
- export_simple(
- node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
- CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNeg *node)
-{
- export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
- CreateNegOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
- export_node(_ctx, node);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV5 *node)
-{
- export_node(_ctx, node);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNotEqual *node)
-{
- export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
- CreateNotEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleOneHot *node)
-{
- export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
- CreateOneHotOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePack *node)
-{
- export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
- CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePad *node)
-{
- export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
- CreatePadOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePadV2 *node)
-{
- export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
- CreatePadV2Options(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePow *node)
-{
- export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
- CreatePowOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePRelu *node)
-{
- export_simple(node, circle::BuiltinOperator_PRELU);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleQuantize *node)
-{
- export_simple(node, circle::BuiltinOperator_QUANTIZE);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRange *node)
-{
- export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
- CreateRangeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRank *node)
-{
- export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
- CreateRankOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceAny *node)
-{
- export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMax *node)
-{
- export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMin *node)
-{
- export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceProd *node)
-{
- export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu *node)
-{
- export_simple(node, circle::BuiltinOperator_RELU);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu6 *node)
-{
- export_simple(node, circle::BuiltinOperator_RELU6);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReluN1To1 *node)
-{
- export_simple(node, circle::BuiltinOperator_RELU_N1_TO_1);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReshape *node)
-{
- auto new_shape = _ctx.builder.CreateVector<int32_t>(
- node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
-
- export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
- CreateReshapeOptions(_ctx.builder, new_shape).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeBilinear *node)
-{
- export_simple(
- node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
- CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
- .Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeNearestNeighbor *node)
-{
- export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
- circle::BuiltinOptions_ResizeNearestNeighborOptions,
- CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseSequence *node)
-{
- export_simple(
- node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
- CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRound *node)
-{
- export_simple(node, circle::BuiltinOperator_ROUND);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRsqrt *node)
-{
- export_simple(node, circle::BuiltinOperator_RSQRT);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleScatterNd *node)
-{
- export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
- CreateScatterNdOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSegmentSum *node)
-{
- export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
- CreateSegmentSumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSelect *node)
-{
- export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
- CreateSelectOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSelectV2 *node)
-{
- export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
- CreateSelectV2Options(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleShape *node)
-{
- export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
- CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSin *node)
-{
- export_simple(node, circle::BuiltinOperator_SIN);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSlice *node)
-{
- export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
- CreateSliceOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSoftmax *node)
-{
- export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
- CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToBatchND *node)
-{
- export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
- circle::BuiltinOptions_SpaceToBatchNDOptions,
- CreateSpaceToBatchNDOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToDepth *node)
-{
- export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
- circle::BuiltinOptions_SpaceToDepthOptions,
- CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSparseToDense *node)
-{
- export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
- circle::BuiltinOptions_SparseToDenseOptions,
- CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSqrt *node)
-{
- export_simple(node, circle::BuiltinOperator_SQRT);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSquare *node)
-{
- export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
- CreateSquareOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSquaredDifference *node)
-{
- export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
- circle::BuiltinOptions_SquaredDifferenceOptions,
- CreateSquaredDifferenceOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSqueeze *node)
-{
- auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
- export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
- CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleStridedSlice *node)
-{
- export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
- circle::BuiltinOptions_StridedSliceOptions,
- CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
- node->ellipsis_mask(), node->new_axis_mask(),
- node->shrink_axis_mask())
- .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSub *node)
-{
- export_simple(
- node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
- CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSum *node)
-{
- export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTanh *node)
-{
- export_simple(node, circle::BuiltinOperator_TANH);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTile *node)
-{
- export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
- CreateTileOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTranspose *node)
-{
- export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
- CreateTransposeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTransposeConv *node)
-{
- export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
- circle::BuiltinOptions_TransposeConvOptions,
- CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
- node->stride()->w(), node->stride()->h())
- .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnidirectionalSequenceLSTM *node)
-{
- export_simple(node, circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
- circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions,
- CreateUnidirectionalSequenceLSTMOptions(
- _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
- node->cell_clip(), node->proj_clip(), node->time_major(),
- node->asymmetric_quantize_inputs())
- .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhere *node)
-{
- export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
- CreateWhereOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleZerosLike *node)
-{
- export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
- CreateZerosLikeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQFullyConnected *node)
-{
- export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
- circle::BuiltinOptions_BCQFullyConnectedOptions,
- CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
- to_circle_actfunc(node->fusedActivationFunction()))
- .Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQGather *node)
-{
- export_simple(
- node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
- CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node)
+namespace luci
{
- export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
- circle::BuiltinOptions_InstanceNormOptions,
- CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
- to_circle_actfunc(node->fusedActivationFunction()))
- .Union());
-}
-void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
- SerializedGraphData &gd, uint32_t node_position)
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
+ SerializedGraphData &gd)
{
- if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+ uint32_t node_position = 0;
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
{
ExportContext ctx{builder, md, gd};
- OperationExporter exporter{ctx};
+ OperationExporterRule exporter_rule{ctx};
+
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&exporter_rule);
const auto ops_size = gd._operators.size();
- exporter.export_node(circle_node);
if (has_origin(circle_node) && ops_size != gd._operators.size())
{
const auto node_id = gd._operators.size() - 1;
@@ -1716,25 +60,7 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
}
md._metadata.add_execution_plan_table(node_position, execution_plan_vector);
}
- }
- else
- {
- INTERNAL_EXN("Node with unsupported dialect found");
- }
-}
-} // namespace
-
-namespace luci
-{
-
-void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
- SerializedGraphData &gd)
-{
- uint32_t node_position = 0;
- for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
- {
- exportNode(node, builder, md, gd, node_position);
node_position++;
}
}
diff --git a/compiler/luci/export/src/CircleOperationExporter.h b/compiler/luci/export/src/CircleOperationExporter.h
index de6abfc54..f2b3cfd6b 100644
--- a/compiler/luci/export/src/CircleOperationExporter.h
+++ b/compiler/luci/export/src/CircleOperationExporter.h
@@ -17,7 +17,7 @@
#ifndef __CIRCLE_OPERATION_EXPORTER_H__
#define __CIRCLE_OPERATION_EXPORTER_H__
-#include "CircleExporterUtils.h"
+#include "SerializedData.h"
#include <loco/IR/Graph.h>
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.cpp b/compiler/luci/export/src/CircleOperationExporterRule.cpp
new file mode 100644
index 000000000..8dc59fa9c
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOperationExporterRule.h"
+#include "CircleBuiltinTypesExtractor.h"
+#include "Check.h"
+
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <oops/InternalExn.h>
+
+#include <vector>
+
+namespace
+{
+class OutputVectorExtractor final : public luci::CircleNodeMutableVisitor<std::vector<int32_t>>
+{
+public:
+ OutputVectorExtractor()
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<int32_t> visit(luci::CircleNode *node) final
+ {
+ std::vector<int32_t> outputs_vec{luci::get_tensor_index(node)};
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleBidirectionalSequenceLSTM *node) final
+ {
+ auto bidi_lstm_outs = loco::succs(node);
+ assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
+
+ std::vector<int32_t> outputs_vec(bidi_lstm_outs.size());
+
+ for (auto out : bidi_lstm_outs)
+ {
+ auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
+ if (bidi_lstm_out->index() >= int32_t(bidi_lstm_outs.size()))
+ INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
+ outputs_vec[bidi_lstm_out->index()] = luci::get_tensor_index(bidi_lstm_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleCustom *node) final
+ {
+ auto custom_outputs = loco::succs(node);
+ assert(custom_outputs.size() == node->numOutputs());
+
+ std::vector<int32_t> outputs_vec(node->numOutputs());
+
+ for (auto out : custom_outputs)
+ {
+ auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+ if (custom_out->index() >= int32_t(node->numOutputs()))
+ INTERNAL_EXN("Invalid Custom output");
+ outputs_vec[custom_out->index()] = luci::get_tensor_index(custom_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleIf *node) final
+ {
+ auto if_outs = loco::succs(node);
+ assert(if_outs.size() == node->output_count());
+
+ std::vector<int32_t> outputs_vec(node->output_count());
+
+ for (auto out : if_outs)
+ {
+ auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+ if (if_out->index() >= int32_t(node->output_count()))
+ INTERNAL_EXN("Invalid If output");
+ outputs_vec[if_out->index()] = luci::get_tensor_index(if_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 2);
+
+ std::vector<int32_t> outputs_vec(2);
+
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+ if (nms_out->index() >= 2)
+ INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+ outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 3);
+
+ std::vector<int32_t> outputs_vec(3);
+
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+ if (nms_out->index() >= 3)
+ INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+ outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleSplit *node) final
+ {
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ std::vector<int32_t> outputs_vec(node->num_split());
+
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+ if (split_out->index() >= node->num_split())
+ INTERNAL_EXN("Invalid Split output");
+ outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleSplitV *node) final
+ {
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ std::vector<int32_t> outputs_vec(node->num_split());
+
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+ if (split_out->index() >= node->num_split())
+ INTERNAL_EXN("Invalid SplitV output");
+ outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleTopKV2 *node) final
+ {
+ auto topkv2_outs = loco::succs(node);
+ assert(topkv2_outs.size() == 2);
+
+ std::vector<int32_t> outputs_vec(2);
+
+ for (auto out : topkv2_outs)
+ {
+ auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+ if (topkv2_out->index() >= 2)
+ INTERNAL_EXN("Invalid TopKV2 output");
+ outputs_vec[topkv2_out->index()] = luci::get_tensor_index(topkv2_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleUnique *node) final
+ {
+ auto unique_outs = loco::succs(node);
+ assert(unique_outs.size() == 2);
+
+ std::vector<int32_t> outputs_vec(2);
+
+ for (auto out : unique_outs)
+ {
+ auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+ if (unique_out->index() >= 2)
+ INTERNAL_EXN("Invalid Unique output");
+ outputs_vec[unique_out->index()] = luci::get_tensor_index(unique_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleUnpack *node) final
+ {
+ auto unpack_outs = loco::succs(node);
+ assert(int32_t(unpack_outs.size()) == node->num());
+
+ std::vector<int32_t> outputs_vec(node->num());
+
+ for (auto out : unpack_outs)
+ {
+ auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+ if (unpack_out->index() >= node->num())
+ INTERNAL_EXN("Invalid Unpack output");
+ outputs_vec[unpack_out->index()] = luci::get_tensor_index(unpack_out);
+ }
+
+ return outputs_vec;
+ }
+
+ std::vector<int32_t> visit(luci::CircleWhile *node) final
+ {
+ auto while_outs = loco::succs(node);
+ assert(while_outs.size() == node->output_count());
+
+ std::vector<int32_t> outputs_vec(node->output_count());
+
+ for (auto out : while_outs)
+ {
+ auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+ if (while_out->index() >= int32_t(node->output_count()))
+ INTERNAL_EXN("Invalid While output");
+ outputs_vec[while_out->index()] = luci::get_tensor_index(while_out);
+ }
+
+ return outputs_vec;
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+void OperationExporterRule::visit(luci::CircleNode *node)
+{
+ auto op_idx = _ctx.md.registerBuiltinOpcode(circle_builtin_operator(node),
+ circle_custom_code(node), node->op_version());
+
+ std::vector<int32_t> inputs_vec;
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(luci::get_tensor_index(node->arg(i)));
+ auto inputs = _ctx.builder.CreateVector(inputs_vec);
+
+ OutputVectorExtractor outputs_vec_extractor;
+ auto outputs_vec = node->accept(&outputs_vec_extractor);
+ auto outputs = _ctx.builder.CreateVector(outputs_vec);
+
+ auto builtin_options = circle_builtin_options(node);
+
+ luci::BuiltinOptionsExtractor builtin_options_extractor(_ctx.builder);
+ auto options_offset = node->accept(&builtin_options_extractor);
+
+ // If node is not CircleCustom, null offset(0) is returned
+ auto custom_options = circle_custom_options(_ctx.builder, node);
+
+ auto op_offset = circle::CreateOperator(_ctx.builder, op_idx, inputs, outputs, builtin_options,
+ options_offset, custom_options);
+ _ctx.gd._operators.push_back(op_offset);
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.h b/compiler/luci/export/src/CircleOperationExporterRule.h
new file mode 100644
index 000000000..23e7546cf
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPERATION_EXPORTER_RULE_H__
+#define __CIRCLE_OPERATION_EXPORTER_RULE_H__
+
+#include "CircleOperationExporter.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+struct ExportContext
+{
+ flatbuffers::FlatBufferBuilder &builder;
+ luci::SerializedModelData &md;
+ luci::SerializedGraphData &gd;
+};
+
+class OperationExporterRule final : public luci::CircleNodeMutableVisitor<void>
+{
+public:
+ OperationExporterRule(ExportContext &ctx) : _ctx{ctx}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // Default export rule
+ void visit(luci::CircleNode *node) final;
+
+ // Non-virtual
+ void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+
+ // Virtual
+ void visit(luci::CircleInput *) final {}
+ void visit(luci::CircleOutput *) final {}
+ void visit(luci::CircleOutputDummy *) final {}
+ void visit(luci::CircleOutputExclude *) final {}
+ // Virtual for multiple-outputs
+ void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
+ void visit(luci::CircleCustomOut *) final {}
+ void visit(luci::CircleIfOut *) final {}
+ void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+ void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
+ void visit(luci::CircleSplitOut *) final {}
+ void visit(luci::CircleSplitVOut *) final {}
+ void visit(luci::CircleTopKV2Out *) final {}
+ void visit(luci::CircleUniqueOut *) final {}
+ void visit(luci::CircleUnpackOut *) final {}
+ void visit(luci::CircleVariable *) final {}
+ void visit(luci::CircleWhileOut *) final {}
+
+protected:
+ ExportContext &_ctx;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_OPERATION_EXPORTER_RULE_H__
diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst
new file mode 100644
index 000000000..1b6909303
--- /dev/null
+++ b/compiler/luci/export/src/CircleOps.lst
@@ -0,0 +1,154 @@
+#ifndef CIRCLE_NODE
+#error "Define CIRCLE_NODE"
+#endif // CIRCLE_NODE
+
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+// NOTE : CIRCLE_VNODE does not have any additional parameters
+// because they are not circle builtin operators
+// Please add parameters when they are needed.
+//
+// CIRCLE_NODE(CircleNode, circle::BuiltinOperator, circle::BuiltinOptions)
+// CIRCLE_VNODE(CircleNode)
+//
+
+CIRCLE_NODE(CircleAbs, BuiltinOperator_ABS, BuiltinOptions_AbsOptions)
+CIRCLE_NODE(CircleAdd, BuiltinOperator_ADD, BuiltinOptions_AddOptions)
+CIRCLE_NODE(CircleAddN, BuiltinOperator_ADD_N, BuiltinOptions_AddNOptions)
+CIRCLE_NODE(CircleArgMax, BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions)
+CIRCLE_NODE(CircleArgMin, BuiltinOperator_ARG_MIN, BuiltinOptions_ArgMinOptions)
+CIRCLE_NODE(CircleAveragePool2D, BuiltinOperator_AVERAGE_POOL_2D , BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleBatchToSpaceND, BuiltinOperator_BATCH_TO_SPACE_ND, BuiltinOptions_BatchToSpaceNDOptions)
+CIRCLE_NODE(CircleBatchMatMul, BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions)
+CIRCLE_NODE(CircleBidirectionalSequenceLSTM, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_BidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleCast, BuiltinOperator_CAST, BuiltinOptions_CastOptions)
+CIRCLE_NODE(CircleCeil, BuiltinOperator_CEIL, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions)
+CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions)
+CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions)
+CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions)
+CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions)
+CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions)
+CIRCLE_NODE(CircleDiv, BuiltinOperator_DIV, BuiltinOptions_DivOptions)
+CIRCLE_NODE(CircleElu, BuiltinOperator_ELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleEqual, BuiltinOperator_EQUAL, BuiltinOptions_EqualOptions)
+CIRCLE_NODE(CircleExp, BuiltinOperator_EXP, BuiltinOptions_ExpOptions)
+CIRCLE_NODE(CircleExpandDims, BuiltinOperator_EXPAND_DIMS, BuiltinOptions_ExpandDimsOptions)
+CIRCLE_NODE(CircleFakeQuant, BuiltinOperator_FAKE_QUANT, BuiltinOptions_FakeQuantOptions)
+CIRCLE_NODE(CircleFill, BuiltinOperator_FILL, BuiltinOptions_FillOptions)
+CIRCLE_NODE(CircleFloor, BuiltinOperator_FLOOR, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleFloorDiv, BuiltinOperator_FLOOR_DIV, BuiltinOptions_FloorDivOptions)
+CIRCLE_NODE(CircleFloorMod, BuiltinOperator_FLOOR_MOD, BuiltinOptions_FloorModOptions)
+CIRCLE_NODE(CircleFullyConnected, BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions)
+CIRCLE_NODE(CircleGather, BuiltinOperator_GATHER, BuiltinOptions_GatherOptions)
+CIRCLE_NODE(CircleGatherNd, BuiltinOperator_GATHER_ND, BuiltinOptions_GatherNdOptions)
+CIRCLE_NODE(CircleGreater, BuiltinOperator_GREATER, BuiltinOptions_GreaterOptions)
+CIRCLE_NODE(CircleGreaterEqual, BuiltinOperator_GREATER_EQUAL, BuiltinOptions_GreaterEqualOptions)
+CIRCLE_NODE(CircleIf, BuiltinOperator_IF, BuiltinOptions_IfOptions)
+CIRCLE_NODE(CircleL2Normalize, BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions)
+CIRCLE_NODE(CircleL2Pool2D, BuiltinOperator_L2_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleLeakyRelu, BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions)
+CIRCLE_NODE(CircleLess, BuiltinOperator_LESS, BuiltinOptions_LessOptions)
+CIRCLE_NODE(CircleLessEqual, BuiltinOperator_LESS_EQUAL, BuiltinOptions_LessEqualOptions)
+CIRCLE_NODE(CircleLocalResponseNormalization, BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, BuiltinOptions_LocalResponseNormalizationOptions)
+CIRCLE_NODE(CircleLog, BuiltinOperator_LOG, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogicalAnd, BuiltinOperator_LOGICAL_AND, BuiltinOptions_LogicalAndOptions)
+CIRCLE_NODE(CircleLogicalNot, BuiltinOperator_LOGICAL_NOT, BuiltinOptions_LogicalNotOptions)
+CIRCLE_NODE(CircleLogicalOr, BuiltinOperator_LOGICAL_OR, BuiltinOptions_LogicalOrOptions)
+CIRCLE_NODE(CircleLogistic, BuiltinOperator_LOGISTIC, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogSoftmax, BuiltinOperator_LOG_SOFTMAX, BuiltinOptions_LogSoftmaxOptions)
+CIRCLE_NODE(CircleMatrixDiag, BuiltinOperator_MATRIX_DIAG, BuiltinOptions_MatrixDiagOptions)
+CIRCLE_NODE(CircleMaxPool2D, BuiltinOperator_MAX_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleMatrixSetDiag, BuiltinOperator_MATRIX_SET_DIAG, BuiltinOptions_MatrixSetDiagOptions)
+CIRCLE_NODE(CircleMaximum, BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMean, BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleMinimum, BuiltinOperator_MINIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMirrorPad, BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions)
+CIRCLE_NODE(CircleMul, BuiltinOperator_MUL, BuiltinOptions_MulOptions)
+CIRCLE_NODE(CircleNeg, BuiltinOperator_NEG, BuiltinOptions_NegOptions)
+CIRCLE_NODE(CircleNonMaxSuppressionV4, BuiltinOperator_NON_MAX_SUPPRESSION_V4, BuiltinOptions_NonMaxSuppressionV4Options)
+CIRCLE_NODE(CircleNonMaxSuppressionV5, BuiltinOperator_NON_MAX_SUPPRESSION_V5, BuiltinOptions_NonMaxSuppressionV5Options)
+CIRCLE_NODE(CircleNotEqual, BuiltinOperator_NOT_EQUAL, BuiltinOptions_NotEqualOptions)
+CIRCLE_NODE(CircleOneHot, BuiltinOperator_ONE_HOT, BuiltinOptions_OneHotOptions)
+CIRCLE_NODE(CirclePack, BuiltinOperator_PACK, BuiltinOptions_PackOptions)
+CIRCLE_NODE(CirclePad, BuiltinOperator_PAD, BuiltinOptions_PadOptions)
+CIRCLE_NODE(CirclePadV2, BuiltinOperator_PADV2, BuiltinOptions_PadV2Options)
+CIRCLE_NODE(CirclePow, BuiltinOperator_POW, BuiltinOptions_PowOptions)
+CIRCLE_NODE(CirclePRelu, BuiltinOperator_PRELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleQuantize, BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions)
+CIRCLE_NODE(CircleRange, BuiltinOperator_RANGE, BuiltinOptions_RangeOptions)
+CIRCLE_NODE(CircleRank, BuiltinOperator_RANK, BuiltinOptions_RankOptions)
+CIRCLE_NODE(CircleReduceAny, BuiltinOperator_REDUCE_ANY, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMax, BuiltinOperator_REDUCE_MAX, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMin, BuiltinOperator_REDUCE_MIN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceProd, BuiltinOperator_REDUCE_PROD, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleRelu, BuiltinOperator_RELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRelu6, BuiltinOperator_RELU6, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReluN1To1, BuiltinOperator_RELU_N1_TO_1, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReshape, BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions)
+CIRCLE_NODE(CircleResizeBilinear, BuiltinOperator_RESIZE_BILINEAR, BuiltinOptions_ResizeBilinearOptions)
+CIRCLE_NODE(CircleResizeNearestNeighbor, BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, BuiltinOptions_ResizeNearestNeighborOptions)
+CIRCLE_NODE(CircleReverseSequence, BuiltinOperator_REVERSE_SEQUENCE, BuiltinOptions_ReverseSequenceOptions)
+CIRCLE_NODE(CircleReverseV2, BuiltinOperator_REVERSE_V2, BuiltinOptions_ReverseV2Options)
+CIRCLE_NODE(CircleRound, BuiltinOperator_ROUND, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRsqrt, BuiltinOperator_RSQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleScatterNd, BuiltinOperator_SCATTER_ND, BuiltinOptions_ScatterNdOptions)
+CIRCLE_NODE(CircleSegmentSum, BuiltinOperator_SEGMENT_SUM, BuiltinOptions_SegmentSumOptions)
+CIRCLE_NODE(CircleSelect, BuiltinOperator_SELECT, BuiltinOptions_SelectOptions)
+CIRCLE_NODE(CircleSelectV2, BuiltinOperator_SELECT_V2, BuiltinOptions_SelectV2Options)
+CIRCLE_NODE(CircleShape, BuiltinOperator_SHAPE, BuiltinOptions_ShapeOptions)
+CIRCLE_NODE(CircleSin, BuiltinOperator_SIN, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSlice, BuiltinOperator_SLICE, BuiltinOptions_SliceOptions)
+CIRCLE_NODE(CircleSoftmax, BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions)
+CIRCLE_NODE(CircleSpaceToBatchND, BuiltinOperator_SPACE_TO_BATCH_ND, BuiltinOptions_SpaceToBatchNDOptions)
+CIRCLE_NODE(CircleSpaceToDepth, BuiltinOperator_SPACE_TO_DEPTH, BuiltinOptions_SpaceToDepthOptions)
+CIRCLE_NODE(CircleSparseToDense, BuiltinOperator_SPARSE_TO_DENSE, BuiltinOptions_SparseToDenseOptions)
+CIRCLE_NODE(CircleSplit, BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions)
+CIRCLE_NODE(CircleSplitV, BuiltinOperator_SPLIT_V, BuiltinOptions_SplitVOptions)
+CIRCLE_NODE(CircleSqrt, BuiltinOperator_SQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSquare, BuiltinOperator_SQUARE, BuiltinOptions_SquareOptions)
+CIRCLE_NODE(CircleSquaredDifference, BuiltinOperator_SQUARED_DIFFERENCE, BuiltinOptions_SquaredDifferenceOptions)
+CIRCLE_NODE(CircleSqueeze, BuiltinOperator_SQUEEZE, BuiltinOptions_SqueezeOptions)
+CIRCLE_NODE(CircleStridedSlice, BuiltinOperator_STRIDED_SLICE, BuiltinOptions_StridedSliceOptions)
+CIRCLE_NODE(CircleSub, BuiltinOperator_SUB, BuiltinOptions_SubOptions)
+CIRCLE_NODE(CircleSum, BuiltinOperator_SUM, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleSVDF, BuiltinOperator_SVDF, BuiltinOptions_SVDFOptions)
+CIRCLE_NODE(CircleTanh, BuiltinOperator_TANH, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleTile, BuiltinOperator_TILE, BuiltinOptions_TileOptions)
+CIRCLE_NODE(CircleTopKV2, BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options)
+CIRCLE_NODE(CircleTranspose, BuiltinOperator_TRANSPOSE, BuiltinOptions_TransposeOptions)
+CIRCLE_NODE(CircleTransposeConv, BuiltinOperator_TRANSPOSE_CONV, BuiltinOptions_TransposeConvOptions)
+CIRCLE_NODE(CircleUnidirectionalSequenceLSTM, BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_UnidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleUnique, BuiltinOperator_UNIQUE, BuiltinOptions_UniqueOptions)
+CIRCLE_NODE(CircleUnpack, BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions)
+CIRCLE_NODE(CircleWhere, BuiltinOperator_WHERE, BuiltinOptions_WhereOptions)
+CIRCLE_NODE(CircleWhile, BuiltinOperator_WHILE, BuiltinOptions_WhileOptions)
+CIRCLE_NODE(CircleZerosLike, BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions)
+// Circle Only
+CIRCLE_NODE(CircleBCQFullyConnected, BuiltinOperator_BCQ_FULLY_CONNECTED, BuiltinOptions_BCQFullyConnectedOptions)
+CIRCLE_NODE(CircleBCQGather, BuiltinOperator_BCQ_GATHER, BuiltinOptions_BCQGatherOptions)
+CIRCLE_NODE(CircleInstanceNorm, BuiltinOperator_INSTANCE_NORM, BuiltinOptions_InstanceNormOptions)
+// Virtual node(s)
+CIRCLE_VNODE(CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CircleConst)
+CIRCLE_VNODE(CircleInput)
+CIRCLE_VNODE(CircleOutput)
+CIRCLE_VNODE(CircleOutputDummy)
+CIRCLE_VNODE(CircleOutputExclude)
+CIRCLE_VNODE(CircleCustomOut)
+CIRCLE_VNODE(CircleIfOut)
+CIRCLE_VNODE(CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CircleSplitOut)
+CIRCLE_VNODE(CircleSplitVOut)
+CIRCLE_VNODE(CircleTopKV2Out)
+CIRCLE_VNODE(CircleUniqueOut)
+CIRCLE_VNODE(CircleUnpackOut)
+CIRCLE_VNODE(CircleVariable)
+CIRCLE_VNODE(CircleWhileOut)
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
index 615402aa8..b3bb850cc 100644
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -67,6 +67,9 @@ public:
luci::SparsityParam *sparsityparam(void) const { return _sparsityparam; }
void sparsityparam(luci::SparsityParam *sp) { _sparsityparam = sp; }
+ bool is_variable(void) const { return _is_variable; }
+ void is_variable(bool v) { _is_variable = v; }
+
private:
std::string _name;
@@ -77,6 +80,8 @@ private:
luci::CircleConst *_content = nullptr;
luci::CircleQuantParam *_quantparam = nullptr;
luci::SparsityParam *_sparsityparam = nullptr;
+
+ bool _is_variable = false;
};
class CircleTensorContext
@@ -145,6 +150,8 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx)
tensor_info.quantparam(node->quantparam());
tensor_info.sparsityparam(node->sparsityparam());
+ tensor_info.is_variable(dynamic_cast<luci::CircleVariable *>(node) != nullptr);
+
set_tensor_index(node, tensor_index);
ctx.emplace_back(tensor_info);
@@ -592,9 +599,11 @@ void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &buil
auto buffer_id = get_buffer_id(builder, md, info.content());
auto name_offset = builder.CreateString(info.name());
- auto tensor_offset =
- CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
- /*is_variable*/ false, sparsityparam, shape_signature_offset);
+
+ auto is_variable = info.is_variable();
+
+ auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
+ quantparam, is_variable, sparsityparam, shape_signature_offset);
gd._tensors.push_back(tensor_offset);
}
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h
index a945eecf7..136a8ac49 100644
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -23,7 +23,7 @@
#include <luci/IR/ExecutionPlanTable.h>
#include <vector>
-
+#include <string>
#include <unordered_map>
#include <map>
@@ -131,8 +131,8 @@ struct SerializedModelData final
* @param builtin_code
* @return idx of opcode in table of opcodes (see schema)
*/
- uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code, const int32_t op_version);
- uint32_t registerCustomOpcode(const std::string &custom_op);
+ uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+ const std::string &custom_code, const int32_t op_version);
};
// Prerequisites for circle::Model object creation
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 6630cab9f..1b2db23ae 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -12,13 +12,14 @@ target_include_directories(luci_import PUBLIC include)
target_link_libraries(luci_import PUBLIC luci_lang)
target_link_libraries(luci_import PUBLIC luci_profile)
target_link_libraries(luci_import PUBLIC luci_plan)
-target_link_libraries(luci_import PUBLIC mio_circle)
+target_link_libraries(luci_import PUBLIC mio_circle04)
target_link_libraries(luci_import PRIVATE luci_env)
target_link_libraries(luci_import PRIVATE luci_log)
target_link_libraries(luci_import PRIVATE luci_logex)
target_link_libraries(luci_import PRIVATE nncc_common)
target_link_libraries(luci_import PRIVATE locop)
target_link_libraries(luci_import PRIVATE oops)
+target_link_libraries(luci_import PRIVATE mio_circle04_helper)
install(TARGETS luci_import DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
@@ -32,7 +33,3 @@ nnas_find_package(GTest REQUIRED)
GTest_AddTest(luci_import_test ${TESTS})
target_include_directories(luci_import_test PRIVATE src)
target_link_libraries(luci_import_test luci_import)
-target_link_libraries(luci_import_test oops)
-target_link_libraries(luci_import_test luci_plan)
-target_link_libraries(luci_import_test luci_lang)
-target_link_libraries(luci_import_test mio_circle)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h
index fb38ba90b..a0519f661 100644
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -35,19 +35,7 @@
namespace luci
{
-bool is_valid(const circle::OperatorCodeT &opcode);
-bool is_valid(const circle::OperatorCode *opcode);
-
-bool is_custom(const circle::OperatorCodeT &opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-
-std::string opcode_name(const circle::OperatorCodeT &opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-
-const char *tensor_name(const circle::TensorT &tensor);
const char *tensor_name(const circle::Tensor *tensor);
-
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor);
loco::DataType luci_datatype(circle::TensorType type);
@@ -57,14 +45,13 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode);
luci::CircleFullyConnected::WeightsFormat
luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
std::unique_ptr<CircleQuantParam>
-luci_quantparam(const circle::QuantizationParametersT *quantization);
-std::unique_ptr<CircleQuantParam>
luci_quantparam(const circle::QuantizationParameters *quantization);
/// @brief Copy common tensor attributes such as name, type, etc. to node.
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node);
void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node);
+std::string fb_string2std_string(const flatbuffers::String *fb_str);
+
/**
* @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
*/
@@ -101,13 +88,6 @@ template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
*/
class CircleReader
{
-private: // unpack API
- using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
- using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
- using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
- using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
- using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
-
private: // direct API
using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
@@ -115,40 +95,21 @@ private: // direct API
using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
- using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
- using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-
public:
CircleReader() = default;
-public: // unpack API
- const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
- const CircleBuffers_t &buffers() const { return _model->buffers; }
- const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
- const CircleOperators_t &operators() const { return _current_subgraph->operators; }
- const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
- const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
- const std::string &name() const { return _current_subgraph->name; }
- const circle::DataFormat &data_format() const { return _current_subgraph->data_format; }
- const CircleMetadata_t &metadata() const { return _model->metadata; }
-
- const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; }
-
- uint32_t num_subgraph() const { return _model->subgraphs.size(); }
-
- circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
- std::string opcode_name(const circle::OperatorT &op) const;
-
public: // direct API
- CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); }
- CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); }
- CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); }
- CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); }
- VectorWrapper<int32_t> native_inputs() const { return wrap(_native_subgraph->inputs()); }
- VectorWrapper<int32_t> native_outputs() const { return wrap(_native_subgraph->outputs()); }
- std::string native_name() const { return _native_subgraph->name()->str(); }
- circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); }
- CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); }
+ CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+ CircleBuffers buffers() const { return wrap(_model->buffers()); }
+ CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+ CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+ VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+ VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+ std::string name() const { return fb_string2std_string(_current_subgraph->name()); }
+ circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+ CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
+
+ uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
std::string opcode_name(const circle::Operator *op) const;
@@ -158,12 +119,8 @@ public:
bool select_subgraph(uint32_t subgraph);
private:
- std::unique_ptr<const circle::ModelT> _model;
- const circle::SubGraphT *_current_subgraph{nullptr};
-
- const circle::Model *_native_model{nullptr};
- const CircleTensorsPtr_t *_tensors_ptr{nullptr};
- const circle::SubGraph *_native_subgraph{nullptr};
+ const circle::Model *_model{nullptr};
+ const circle::SubGraph *_current_subgraph{nullptr};
};
} // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
index b8dc22fdd..93e34a56b 100644
--- a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
@@ -18,6 +18,7 @@
#define __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
#include "GraphBuilderBase.h"
+#include "NodeBuilder.h"
#include <map>
@@ -32,6 +33,11 @@ struct GraphBuilderSource
* @brief Returns registered GraphBuilder pointer for operator (nullptr if not present)
*/
virtual const GraphBuilderBase *lookup(const circle::BuiltinOperator &op) const = 0;
+
+ /**
+ * @brief Returns registered NodeBuilderBase pointer for type (nullptr if not present)
+ */
+ virtual const NodeBuilderBase *lookup(const NodeBuilderType type) const = 0;
};
/**
@@ -61,6 +67,17 @@ public:
return _builder_map.at(op).get();
}
+ /**
+ * @brief Returns registered NodeBuilderBase pointer for type or nullptr if not registered
+ */
+ const NodeBuilderBase *lookup(const NodeBuilderType type) const final
+ {
+ if (_node_builders.find(type) == _node_builders.end())
+ return (_parent == nullptr) ? nullptr : _parent->lookup(type);
+
+ return _node_builders.at(type).get();
+ }
+
static GraphBuilderRegistry &get()
{
static GraphBuilderRegistry me;
@@ -73,11 +90,17 @@ public:
_builder_map[op] = std::move(builder);
}
+ void add(std::unique_ptr<NodeBuilderBase> &&builder)
+ {
+ _node_builders[builder->builder_type()] = std::move(builder);
+ }
+
private:
const GraphBuilderSource *_parent = nullptr;
private:
std::map<const circle::BuiltinOperator, std::unique_ptr<GraphBuilderBase>> _builder_map;
+ std::map<const NodeBuilderType, std::unique_ptr<NodeBuilderBase>> _node_builders;
};
} // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/NodeBuilder.h b/compiler/luci/import/include/luci/Import/NodeBuilder.h
new file mode 100644
index 000000000..440b491b0
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/NodeBuilder.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_NODE_BUILDER_H__
+#define __LUCI_IMPORT_NODE_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Tensor types which requires separated node
+ */
+enum class NodeBuilderType
+{
+ BUFFER,
+ // TODO Extend this struct here if needed to add new type of NodeBuilderBase
+};
+
+/**
+ * @brief Creates nodes from given Tensor and context
+ */
+class NodeBuilderBase
+{
+public:
+ virtual CircleNode *build(TensorIndex tensor_idx, GraphBuilderContext *context) const = 0;
+ virtual NodeBuilderType builder_type() const = 0;
+};
+
+/**
+ * @brief Placeholder for builders of tensors with different types
+ */
+template <NodeBuilderType Type> class TypedNodeBuilder : public NodeBuilderBase
+{
+public:
+ NodeBuilderType builder_type() const final { return Type; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_NODE_BUILDER_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index f7d22e7aa..7a5045ede 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -122,6 +122,7 @@
#include "Nodes/CircleStridedSlice.h"
#include "Nodes/CircleSub.h"
#include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
#include "Nodes/CircleTanh.h"
#include "Nodes/CircleTile.h"
#include "Nodes/CircleTopKV2.h"
@@ -130,6 +131,7 @@
#include "Nodes/CircleUnidirectionalSequenceLSTM.h"
#include "Nodes/CircleUnique.h"
#include "Nodes/CircleUnpack.h"
+#include "Nodes/CircleVariable.h"
#include "Nodes/CircleWhere.h"
#include "Nodes/CircleWhile.h"
#include "Nodes/CircleZerosLike.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
index 7d4f10a59..9e50ddbde 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
@@ -17,20 +17,21 @@
#ifndef __LUCI_IMPORT_OP_CIRCLE_CONST_H__
#define __LUCI_IMPORT_OP_CIRCLE_CONST_H__
-#include "luci/Import/GraphBuilderContext.h"
+#include "luci/Import/NodeBuilder.h"
#include <luci/IR/Nodes/CircleConst.h>
-/*
- * @note Circle does not have Const operator.
- * Methods here provide helper that creates CircleConst from
- * Tensor and Buffer in circle flatbuffer file.
- */
-
namespace luci
{
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index);
+/**
+ * @brief Builder creates CircleConst node from Tensor with buffer.
+ */
+class CircleConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+ CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
} // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h
new file mode 100644
index 000000000..a91f66019
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+#define __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleSVDFBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h
new file mode 100644
index 000000000..4d8961fa5
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+
+#include "luci/Import/GraphBuilderContext.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+
+/*
+ * @note Circle does not have node for variable tensor
+ * Methods here provide helper that creates CircleVariable from
+ * Tensor having is_variable true value.
+ */
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index);
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp
index 42dcebdaa..9c1fe7356 100644
--- a/compiler/luci/import/src/CircleImportMetadata.cpp
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -21,8 +21,10 @@
namespace
{
-uint32_t read_u32(const std::vector<uint8_t> &buffer, uint32_t idx)
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
uint32_t val = 0;
val += (buffer.at(idx + 0) << 0 * 8);
val += (buffer.at(idx + 1) << 1 * 8);
@@ -37,9 +39,11 @@ namespace
{
// 'source_table' is decoded to std::map<uint32_t, std::string> format.
-const std::map<uint32_t, std::string>
-decoded_source_table(const std::vector<uint8_t> &source_table_data)
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::string> decoded_source_table(const VECTORTYPE &source_table_data)
{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
std::map<uint32_t, std::string> source_id_name_map;
uint32_t idx = 0;
@@ -86,9 +90,11 @@ decoded_source_table(const std::vector<uint8_t> &source_table_data)
}
// 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format.
-const std::map<uint32_t, std::set<uint32_t>>
-decoded_op_table(const std::vector<uint8_t> &op_table_data)
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::set<uint32_t>> decoded_op_table(const VECTORTYPE &op_table_data)
{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
std::map<uint32_t, std::set<uint32_t>> node_source_ids_map;
uint32_t idx = 0;
@@ -135,9 +141,11 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data)
}
// 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format.
-const luci::ExecutionPlanTable
-decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
+template <typename VECTORTYPE>
+const luci::ExecutionPlanTable decoded_execution_plan(const VECTORTYPE &execution_plan_data)
{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
luci::ExecutionPlanTable execution_plan_table;
uint32_t idx = 0;
@@ -156,6 +164,10 @@ decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
idx += sizeof(uint32_t);
uint32_t size = read_u32(execution_plan_data, idx);
+
+ if (size == 0)
+ throw std::runtime_error("Op table decode error : empty execution plan entry");
+
idx += sizeof(uint32_t);
if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
@@ -190,19 +202,22 @@ namespace luci
CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
{
- const auto &metadata = reader.metadata();
+ const auto metadata = reader.metadata();
for (uint32_t i = 0; i < metadata.size(); ++i)
{
- const circle::MetadataT &meta = *metadata[i];
+ const auto *meta = metadata[i];
+ assert(meta != nullptr);
- assert(meta.buffer < reader.buffers().size());
- const std::vector<uint8_t> &buffer = reader.buffers()[meta.buffer]->data;
+ assert(meta->buffer() < reader.buffers().size());
+ assert(reader.buffers()[meta->buffer()] != nullptr);
+ const auto buffer = luci::wrap(reader.buffers()[meta->buffer()]->data());
- if (meta.name.compare("ONE_op_table") == 0)
+ assert(meta->name() != nullptr);
+ if (meta->name()->str().compare("ONE_op_table") == 0)
_op_table = decoded_op_table(buffer);
- else if (meta.name.compare("ONE_source_table") == 0)
+ else if (meta->name()->str().compare("ONE_source_table") == 0)
_source_table = decoded_source_table(buffer);
- else if (meta.name.compare("ONE_execution_plan_table") == 0)
+ else if (meta->name()->str().compare("ONE_execution_plan_table") == 0)
_execution_plan_table = decoded_execution_plan(buffer);
}
}
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
index 14917ba06..a42c3f913 100644
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -16,6 +16,9 @@
#include "luci/Import/CircleReader.h"
+#include <mio_circle/Helper.h>
+
+#include <algorithm>
#include <memory>
#include <sstream>
#include <string>
@@ -23,103 +26,14 @@
namespace luci
{
-bool is_valid(const circle::OperatorCodeT &opcode)
-{
- circle::BuiltinOperator code = opcode.builtin_code;
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- circle::BuiltinOperator code = opcode->builtin_code();
- return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCodeT &opcode)
-{
- circle::BuiltinOperator code = opcode.builtin_code;
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- circle::BuiltinOperator code = opcode->builtin_code();
- return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCodeT &opcode)
-{
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (opcode.custom_code.empty())
- return "(invalid custom)";
-
- return opcode.custom_code;
- }
-
- circle::BuiltinOperator code = opcode.builtin_code;
- return circle::EnumNameBuiltinOperator(code);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- auto custom_code = opcode->custom_code()->str();
- if (custom_code.empty())
- return "(invalid custom)";
-
- return custom_code;
- }
-
- circle::BuiltinOperator code = opcode->builtin_code();
- return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_name(const circle::TensorT &tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- if (!tensor.name.empty())
- return tensor.name.c_str();
-
- return kEmptyTensorName;
-}
-
const char *tensor_name(const circle::Tensor *tensor)
{
assert(tensor != nullptr);
- static const char *kEmptyTensorName = "(noname)";
- const auto tensor_name = tensor->name()->c_str();
-
- if (!std::string(tensor_name).empty())
- return tensor_name;
+ if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+ return "(noname)";
- return kEmptyTensorName;
-}
-
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
-{
- return tensor.quantization.get();
+ return tensor->name()->c_str();
}
const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor)
@@ -334,41 +248,6 @@ std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParamete
return luci_sparsityparam(&sparsity);
}
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
-{
- node->name(tensor_name(tensor));
- node->dtype(luci_datatype(tensor.type));
-
- assert(tensor.shape_signature.size() == 0 ||
- tensor.shape_signature.size() == tensor.shape.size());
-
- std::vector<int32_t> dims = tensor.shape; // in NHWC
- node->rank(dims.size());
- for (uint32_t r = 0; r < dims.size(); ++r)
- {
- if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
- node->dim(r).unset();
- else
- node->dim(r).set(dims[r]);
- }
-
- const auto *quantization = tensor.quantization.get();
- if (quantization != nullptr)
- {
- auto quantparam = luci_quantparam(quantization);
- if (quantparam)
- node->quantparam(std::move(quantparam));
- }
-
- const auto *sparsity = tensor.sparsity.get();
- if (sparsity != nullptr)
- {
- auto sparsityparam = luci_sparsityparam(sparsity);
- if (sparsityparam)
- node->sparsityparam(std::move(sparsityparam));
- }
-}
-
void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
{
assert(tensor != nullptr);
@@ -408,63 +287,60 @@ void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
}
}
-circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
+std::string fb_string2std_string(const flatbuffers::String *fb_str)
{
- const auto &op_codes = opcodes();
- uint32_t index = op.opcode_index;
+ return fb_str == nullptr ? "" : fb_str->str();
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+ assert(op != nullptr);
+
+ const auto op_codes = opcodes();
+ uint32_t index = op->opcode_index();
assert(index < op_codes.size());
- const circle::OperatorCodeT &opcode = *op_codes[index];
+ const auto opcode = op_codes[index];
+ assert(opcode != nullptr);
- return opcode.builtin_code;
+ return mio::circle::builtin_code_neutral(opcode);
}
-std::string CircleReader::opcode_name(const circle::OperatorT &op) const
+std::string CircleReader::opcode_name(const circle::Operator *op) const
{
- const auto &op_codes = opcodes();
- uint32_t index = op.opcode_index;
- assert(index < op_codes.size());
- const circle::OperatorCodeT &opcode = *op_codes[index];
+ assert(op != nullptr);
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid: " << index << ")";
- return oss.str();
- }
+ const auto op_codes = opcodes();
+ uint32_t index = op->opcode_index();
+ assert(index < op_codes.size());
+ const auto opcode = op_codes[index];
- return ::luci::opcode_name(opcode);
+ return mio::circle::opcode_name(opcode);
}
bool CircleReader::parse(const circle::Model *model)
{
assert(model != nullptr);
- _model.reset(model->UnPack());
-
// for direct pointer access
- _native_model = model;
+ _model = model;
return true;
}
bool CircleReader::select_subgraph(uint32_t sgindex)
{
- if (_model->subgraphs.size() <= sgindex)
+ if (num_subgraph() <= sgindex)
{
assert(false);
return false;
}
- _current_subgraph = _model->subgraphs[sgindex].get();
-
// for direct pointer access
- auto subgraphs = _native_model->subgraphs();
+ auto subgraphs = _model->subgraphs();
assert(subgraphs != nullptr);
- _native_subgraph = subgraphs->Get(sgindex);
- assert(_native_subgraph != nullptr);
-
- _tensors_ptr = _native_subgraph->tensors();
+ _current_subgraph = subgraphs->Get(sgindex);
+ assert(_current_subgraph != nullptr);
return true;
}
diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp
index 356501c2f..59a08b546 100644
--- a/compiler/luci/import/src/GraphBuilder.cpp
+++ b/compiler/luci/import/src/GraphBuilder.cpp
@@ -29,10 +29,9 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext
const std::vector<int32_t> &inputs = op.inputs;
const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
+ const auto tensors = context->reader()->tensors();
+ const auto opcodes = context->reader()->opcodes();
+ assert(!tensors.null());
std::vector<CircleNode *> input_nodes;
for (const int32_t input_tensor_index : inputs)
@@ -60,16 +59,18 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext
// Set up node parameters.
assert(outputs.size() == 1);
{
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto output_tensor = tensors[outputs[0]];
+ assert(output_tensor != nullptr);
copy_tensor_attributes(output_tensor, node);
// mark shape_status
- if (tensors_ptr->Get(outputs[0])->shape() == nullptr)
+ if (output_tensor->shape() == nullptr)
node->shape_status(ShapeStatus::NOSHAPE);
else
node->shape_status(ShapeStatus::VALID);
// mark operator version
- node->op_version(opcodes[op.opcode_index].get()->version);
+ assert(opcodes[op.opcode_index] != nullptr);
+ node->op_version(opcodes[op.opcode_index]->version());
}
// Register node's only output.
diff --git a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
index be553f4c0..4df8d1e5a 100644
--- a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
+++ b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
@@ -30,10 +30,9 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
const std::vector<int32_t> &inputs = op.inputs;
const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
+ const auto tensors = context->reader()->tensors();
+ const auto opcodes = context->reader()->opcodes();
+ assert(!tensors.null());
std::vector<CircleNode *> input_nodes;
for (const int32_t input_tensor_index : inputs)
@@ -64,12 +63,14 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
if (output_count > 0)
{
// Let's use attributes from output 0 for this node
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto output_tensor = tensors[outputs[0]];
+ assert(output_tensor != nullptr);
node->name(tensor_name(output_tensor));
- node->dtype(luci_datatype(output_tensor.type));
+ node->dtype(luci_datatype(output_tensor->type()));
// mark operator version
- node->op_version(opcodes[op.opcode_index].get()->version);
+ assert(opcodes[op.opcode_index] != nullptr);
+ node->op_version(opcodes[op.opcode_index]->version());
// NOTE We don't set quantization for multiple output nodes but to virtual outputs
}
@@ -77,7 +78,8 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
// Create virtual outputs of Virtual Output node(s)
for (uint32_t n = 0; n < output_count; ++n)
{
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ const auto output_tensor = tensors[outputs[n]];
+ assert(output_tensor != nullptr);
BuildOutArgs boa(node, n);
auto *nodeout = build_out(boa);
@@ -85,7 +87,7 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
copy_tensor_attributes(output_tensor, nodeout);
// NOTE name of CxxxOut nodes may have same name
// mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ if (output_tensor->shape() == nullptr)
nodeout->shape_status(ShapeStatus::NOSHAPE);
else
nodeout->shape_status(ShapeStatus::VALID);
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index df07d9e48..fe2d830e9 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -131,6 +131,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceGraphBuilder); // 45
CIRCLE_NODE(SUB, CircleSubGraphBuilder); // 41
CIRCLE_NODE(SUM, CircleSumGraphBuilder); // 74
+ CIRCLE_NODE(SVDF, CircleSVDFBuilder); // 27
CIRCLE_NODE(TANH, CircleTanhGraphBuilder); // 28
CIRCLE_NODE(TILE, CircleTileGraphBuilder); // 69
CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder); // 48
@@ -150,7 +151,6 @@ GraphBuilderRegistry::GraphBuilderRegistry()
// BuiltinOperator_LSH_PROJECTION = 15,
// BuiltinOperator_LSTM = 16,
// BuiltinOperator_RNN = 24,
- // BuiltinOperator_SVDF = 27,
// BuiltinOperator_CONCAT_EMBEDDINGS = 29,
// BuiltinOperator_SKIP_GRAM = 30,
// BuiltinOperator_CALL = 31,
@@ -161,6 +161,13 @@ GraphBuilderRegistry::GraphBuilderRegistry()
// BuiltinOperator_ARG_MAX = 56,
// BuiltinOperator_HARD_SWISH = 117,
// BuiltinOperator_DENSIFY = 124,
+
+ // Register builders for nodes which not handles in builders registered above.
+#define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>())
+
+ CIRCLE_NODE(CircleConstNodeBuilder);
+
+#undef CIRCLE_NODE
}
} // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
index 3f7f78591..15de03df2 100644
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -23,6 +23,7 @@
#include "luci/Import/GraphBuilderRegistry.h"
#include "luci/Import/CircleReader.h"
#include "luci/Import/Nodes/CircleConst.h"
+#include "luci/Import/Nodes/CircleVariable.h"
#include <luci/IR/Module.h>
#include <luci/IR/CircleNodes.h>
@@ -50,18 +51,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
luci::GraphBuilderContext gb_context(graph, &reader, nodefinder.get(), tensoroutputs.get());
- const auto &operators = reader.operators();
- const auto &tensors = reader.tensors();
- auto tensors_ptr = reader.tensors_ptr();
- assert(tensors_ptr != nullptr);
+ const auto operators = reader.operators();
+ const auto tensors = reader.tensors();
+ assert(!tensors.null());
auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
// build a cache to identify if a tensor is output of an operator
// if this is set, we should not create a CircleConst for this tensor
for (uint32_t i = 0; i < operators.size(); ++i)
{
- const circle::OperatorT &op = *operators[i];
- const auto &outputs = op.outputs;
+ const auto op = operators[i];
+ assert(op != nullptr);
+ const auto outputs = luci::wrap(op->outputs());
for (uint32_t j = 0; j < outputs.size(); ++j)
{
@@ -77,10 +78,11 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
{
auto input_node = graph->nodes()->create<luci::CircleInput>();
assert(input_node != nullptr);
- const circle::TensorT &tensor = *tensors[input];
+ const auto tensor = tensors[input];
+ assert(tensor != nullptr);
luci::copy_tensor_attributes(tensor, input_node);
- if (tensors_ptr->Get(input)->shape() == nullptr)
+ if (tensor->shape() == nullptr)
input_node->shape_status(luci::ShapeStatus::NOSHAPE);
else
input_node->shape_status(luci::ShapeStatus::VALID);
@@ -101,16 +103,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
// Data type
graph_input->dtype(input_node->dtype());
- assert(tensor.shape_signature.size() == 0 ||
- tensor.shape_signature.size() == tensor.shape.size());
+ const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+ const auto tensor_shape = luci::wrap(tensor->shape());
+ assert(tensor_shape_signature.size() == 0 ||
+ tensor_shape_signature.size() == tensor_shape.size());
// Shape of GraphInput
auto input_shape = std::make_unique<loco::TensorShape>();
- const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
+ const auto &input_dims = tensor_shape; // in NHWC
input_shape->rank(input_dims.size());
for (uint32_t r = 0; r < input_dims.size(); ++r)
{
- if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+ if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
input_shape->dim(r).unset();
else
input_shape->dim(r).set(input_dims[r]);
@@ -118,15 +122,28 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
graph_input->shape(std::move(input_shape));
}
- // Create CircleConst nodes for constant tensors.
+ // Create CircleNodes for constant tensors.
// NOTE Origin is intentionally not provided for constants.
+ auto const_builder = source.lookup(luci::NodeBuilderType::BUFFER);
+ if (not const_builder)
+ throw oops::UserExn("Not supported", "tensor with buffer builder");
+
for (uint32_t i = 0; i < tensors.size(); ++i)
{
- luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
+ auto *const_node = const_builder->build(i, &gb_context);
if (const_node != nullptr)
nodefinder->enroll(i, const_node);
}
+ // Create CircleVariable nodes for variable tensors
+ // TODO Add Origin if needed, skip for now
+ for (uint32_t i = 0; i < tensors.size(); ++i)
+ {
+ luci::CircleVariable *variable_node = luci::create_circlevariable(&gb_context, i);
+ if (variable_node != nullptr)
+ nodefinder->enroll(i, variable_node);
+ }
+
// Import the operators.
// Note that operators in model are stored in execution order. This means that when importing
// an operator, its input operators have already been imported. We exploit this fact to set up
@@ -134,18 +151,23 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
auto origin_table = circle_metadata->origin_table();
for (uint32_t i = 0; i < operators.size(); ++i)
{
- const circle::OperatorT &op = *operators[i];
+ const auto op = operators[i];
+ assert(op != nullptr);
circle::BuiltinOperator builtincode = reader.builtin_code(op);
if (const auto *builder = source.lookup(builtincode))
{
- luci::GraphBuilder::ValidateArgs args(op, reader);
+ // create temporary unpack API obj
+ circle::OperatorT oper_t;
+ op->UnPackTo(&oper_t);
+
+ luci::GraphBuilder::ValidateArgs args(oper_t, reader);
if (!builder->validate(args))
{
throw oops::UserExn("Invalid operator", reader.opcode_name(op));
}
- auto built_op = builder->build(op, &gb_context);
+ auto built_op = builder->build(oper_t, &gb_context);
set_node_id(built_op, i);
if (origin_table.find(i) != origin_table.end())
add_origin(built_op, origin_table.at(i));
@@ -161,7 +183,8 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
// graph outputs
for (auto output : reader.outputs())
{
- const circle::TensorT &tensor = *tensors[output];
+ const auto tensor = tensors[output];
+ assert(tensor != nullptr);
auto output_node = graph->nodes()->create<luci::CircleOutput>();
assert(output_node != nullptr);
@@ -178,7 +201,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
output_node->from(output_dummy);
luci::copy_tensor_attributes(tensor, output_dummy);
- if (tensors_ptr->Get(output)->shape() == nullptr)
+ if (tensor->shape() == nullptr)
output_dummy->shape_status(luci::ShapeStatus::NOSHAPE);
else
output_dummy->shape_status(luci::ShapeStatus::VALID);
@@ -197,16 +220,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
// Set GraphInputOutputIndex for graph
output_node->index(graph_output->index());
- assert(tensor.shape_signature.size() == 0 ||
- tensor.shape_signature.size() == tensor.shape.size());
+ const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+ const auto tensor_shape = luci::wrap(tensor->shape());
+ assert(tensor_shape_signature.size() == 0 ||
+ tensor_shape_signature.size() == tensor_shape.size());
// Shape of Output
auto output_shape = std::make_unique<loco::TensorShape>();
- const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
+ const auto &output_dims = tensor_shape; // in NHWC
output_shape->rank(output_dims.size());
for (uint32_t r = 0; r < output_dims.size(); ++r)
{
- if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+ if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
output_shape->dim(r).unset();
else
output_shape->dim(r).set(output_dims[r]);
@@ -214,7 +239,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
graph_output->shape(std::move(output_shape));
// Data type
- auto dtype = luci::luci_datatype(tensor.type);
+ auto dtype = luci::luci_datatype(tensor->type());
graph_output->dtype(dtype);
}
}
@@ -355,7 +380,12 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
{
if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
{
+ if (execution_plan_table.count(node_position) == 0)
+ continue;
+
auto node_plan = execution_plan_table[node_position];
+ assert(node_plan.size() > 0);
+
luci::add_execution_plan(
circle_node,
luci::CircleNodeExecutionPlan(
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp
index d963b4d49..91e4860ea 100644
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -23,7 +23,7 @@
#include <mio/circle/schema_generated.h>
#include <flatbuffers/flatbuffers.h>
-TEST(TensorFlowLiteImport, Dummy)
+TEST(CircleImport, Dummy)
{
luci::Importer import;
@@ -68,6 +68,7 @@ struct BasicCircleModel
{
uint32_t id = model->operator_codes.size();
model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>());
+ model->operator_codes[id]->deprecated_builtin_code = opcode;
model->operator_codes[id]->builtin_code = opcode;
model->operator_codes[id]->version = 1;
return id;
@@ -179,7 +180,7 @@ struct SimpleRELUModel : public BasicCircleModel
/**
* This test checks that one op RELU model with execution plan is successfully imported
*/
-TEST(TensorFlowLiteImport, simple_plan)
+TEST(CircleImport, simple_plan)
{
SimpleRELUModel model;
auto metadata_buffer_id = model.add_buffer();
@@ -240,7 +241,7 @@ TEST(TensorFlowLiteImport, simple_plan)
/**
* This test checks that model with incomplete execution plan is successfully imported
*/
-TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
+TEST(CircleImport, incomplete_plan_NEG)
{
SimpleRELUModel model;
auto metadata_buffer_id = model.add_buffer();
@@ -287,7 +288,7 @@ TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
/**
* This test checks that corrupted execution plan induce exception
*/
-TEST(TensorFlowLiteImport, corrupted_plan_NEG)
+TEST(CircleImport, corrupted_plan_NEG)
{
SimpleRELUModel model;
auto metadata_buffer_id = model.add_buffer();
@@ -309,3 +310,44 @@ TEST(TensorFlowLiteImport, corrupted_plan_NEG)
ASSERT_ANY_THROW(import.importModule(model_ptr));
}
+
+/**
+ * This test checks that empty execution plan entry induce exception
+ */
+TEST(CircleImport, corrupted_plan_entry_NEG)
+{
+ SimpleRELUModel model;
+ auto metadata_buffer_id = model.add_buffer();
+ model.add_plan_metadata(metadata_buffer_id);
+
+ model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+ // add corrupted entry with 0 size
+ {
+ auto &buffer = model.model->buffers[metadata_buffer_id]->data;
+ auto old_size = buffer.size();
+
+ // Allocate space for new entry:
+ // 4 bytes for entry id
+ // 4 bytes for entry size
+ buffer.resize(old_size + 8);
+ uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+ *number_of_entries_ptr += 1;
+
+ uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+ entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+ entry_data_ptr[1] = 0; // entry size
+ }
+
+ model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+ flatbuffers::FlatBufferBuilder fbb;
+ auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+ circle::FinishModelBuffer(fbb, model_offset);
+
+ auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+ luci::Importer import;
+
+ ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp
index 3e8c08bfa..acde823b1 100644
--- a/compiler/luci/import/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCast.cpp
@@ -42,12 +42,14 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
const auto *options = args.op.builtin_options.AsCastOptions();
if (options != nullptr)
{
- const auto &tensors = args.reader.tensors();
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto tensors = args.reader.tensors();
+ const auto output_tensor = tensors[outputs[0]];
+ assert(output_tensor != nullptr);
auto name = tensor_name(output_tensor);
- const auto &tensor_in = tensors.at(inputs.at(0));
- if (tensor_in->type != options->in_data_type)
+ const auto tensor_in = tensors.at(inputs.at(0));
+ assert(tensor_in != nullptr);
+ if (tensor_in->type() != options->in_data_type)
{
if (settings->get(luci::UserSettings::Key::DisableValidation))
{
@@ -57,7 +59,7 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
const auto &tensor_out = tensors.at(outputs[0]);
- if (tensor_out->type != options->out_data_type)
+ if (tensor_out->type() != options->out_data_type)
{
if (settings->get(luci::UserSettings::Key::DisableValidation))
{
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index 11fbb4e54..a4f190dd9 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -30,10 +30,10 @@
namespace
{
-std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
{
uint32_t seq = 0;
- for (auto &v : vect)
+ for (const auto &v : vect)
{
if (seq)
os << ", ";
@@ -46,7 +46,8 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
using namespace luci;
template <loco::DataType DT>
-void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, CircleConst *const_node)
+void copy_data(const VectorWrapper<uint8_t> &raw_data, uint32_t num_elements,
+ CircleConst *const_node)
{
using T = typename loco::DataTypeImpl<DT>::Type;
@@ -67,8 +68,8 @@ void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, Circ
}
template <>
-void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uint32_t num_elements,
- CircleConst *const_node)
+void copy_data<loco::DataType::STRING>(const VectorWrapper<uint8_t> &raw_data,
+ uint32_t num_elements, CircleConst *const_node)
{
assert(const_node->sparsityparam() == nullptr);
@@ -106,17 +107,26 @@ void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uin
namespace luci
{
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index)
+CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index,
+ GraphBuilderContext *context) const
{
+ assert(tensor_index >= 0);
LOGGER(l);
auto graph = context->graph();
auto reader = context->reader();
- const auto &tensors = reader->tensors();
- const circle::TensorT &const_tensor = *tensors[tensor_index];
+ const auto tensors = reader->tensors();
+ const auto const_tensor = tensors[tensor_index];
+ assert(const_tensor != nullptr);
+ if (const_tensor->is_variable())
+ {
+ // Create CircleVariable for variable
+ return nullptr;
+ }
- const std::vector<uint8_t> &buffer = reader->buffers()[const_tensor.buffer]->data;
- std::vector<int32_t> const_dims = const_tensor.shape; // in NHWC
+ assert(reader->buffers()[const_tensor->buffer()] != nullptr);
+ const auto buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+ const auto const_dims = wrap(const_tensor->shape()); // in NHWC
if (const_dims.size() == 0 && buffer.empty())
{
// unknown shape tensor and scalar tensor
@@ -150,7 +160,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
<< const_dims << std::endl;
if (num_elements > 0)
{
- switch (luci_datatype(const_tensor.type))
+ switch (luci_datatype(const_tensor->type()))
{
case loco::DataType::FLOAT32:
copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
@@ -186,7 +196,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
default:
throw oops::UserExn("Unsupported tensor type",
- circle::EnumNameTensorType(const_tensor.type));
+ circle::EnumNameTensorType(const_tensor->type()));
}
}
diff --git a/compiler/luci/import/src/Nodes/CircleCustom.cpp b/compiler/luci/import/src/Nodes/CircleCustom.cpp
index 01ac3e2a0..4e78d5fb7 100644
--- a/compiler/luci/import/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCustom.cpp
@@ -39,13 +39,15 @@ CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const
node->inputs(idx, bna.input_nodes[idx]);
}
- const auto &opcodes = bna.context->reader()->opcodes();
+ const auto opcodes = bna.context->reader()->opcodes();
const uint32_t opcode_index = bna.op.opcode_index;
- const circle::OperatorCodeT &opcode = *opcodes[opcode_index];
+ const auto opcode = opcodes[opcode_index];
+ assert(opcode != nullptr);
node->custom_options(
std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()});
- node->custom_code(opcode.custom_code);
+ assert(opcode->custom_code() != nullptr);
+ node->custom_code(opcode->custom_code()->c_str());
// NOTE Operator version of custom is always 1
diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
index 49eb30a83..83fc2e37d 100644
--- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
@@ -34,9 +34,10 @@ bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
const auto &outputs = args.op.outputs;
const auto *options = args.op.builtin_options.AsDepthToSpaceOptions();
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
+ assert(tensors[outputs[0]] != nullptr && tensors[inputs.at(0)] != nullptr);
- if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
+ if (tensors[outputs[0]]->type() != tensors[inputs.at(0)]->type())
{
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
index 727487c6a..a24e4160d 100644
--- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -32,19 +32,21 @@ bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const
if (args.op.outputs.size() != 1)
return false;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
// input shape
- const auto &input = tensors.at(args.op.inputs.at(0));
- const auto &input_shape = input->shape;
+ const auto input = tensors.at(args.op.inputs.at(0));
+ assert(input != nullptr);
+ const auto input_shape = wrap(input->shape());
// input shape must be rank 4
if (input_shape.size() != 4)
return false;
// filter shape
- const auto &filter = tensors.at(args.op.inputs.at(1));
- const auto &filter_shape = filter->shape;
+ const auto filter = tensors.at(args.op.inputs.at(1));
+ assert(filter != nullptr);
+ const auto filter_shape = wrap(filter->shape());
// filter shape must be rank 4
if (filter_shape.size() != 4)
diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp
index 41696a65a..e5d7a4c7a 100644
--- a/compiler/luci/import/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleElu.cpp
@@ -31,10 +31,11 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
- switch (tensor->type)
+ switch (tensor->type())
{
case circle::TensorType_FLOAT64:
break;
@@ -48,7 +49,8 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensors[outputs[0]]->type != tensor->type)
+ assert(tensors[outputs[0]] != nullptr);
+ if (tensors[outputs[0]]->type() != tensor->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp
index 4909692b4..b326d9b5d 100644
--- a/compiler/luci/import/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -29,9 +29,10 @@ bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+ return tensors[inputs.at(0)]->type() == tensors[inputs.at(1)]->type();
}
CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp
index 5bb7bb664..82c26f0e5 100644
--- a/compiler/luci/import/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -30,9 +30,10 @@ bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
// input type check
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
index ee0fbdc7e..67d9b7e9e 100644
--- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
@@ -29,9 +29,10 @@ bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
+ assert(tensors[inputs.at(1)] != nullptr);
+ return tensors[inputs.at(1)]->type() == circle::TensorType_INT32;
}
CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
index ce329326a..67eeddf91 100644
--- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
@@ -30,15 +30,18 @@ bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in_0 = tensors.at(inputs.at(0));
- const auto &tensor_in_1 = tensors.at(inputs.at(1));
- const auto &tensor_out = tensors.at(outputs[0]);
-
- if (tensor_in_0->type != tensor_in_1->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in_0 = tensors.at(inputs.at(0));
+ const auto tensor_in_1 = tensors.at(inputs.at(1));
+ const auto tensor_out = tensors.at(outputs[0]);
+ assert(tensor_in_0 != nullptr);
+ assert(tensor_in_1 != nullptr);
+ assert(tensor_out != nullptr);
+
+ if (tensor_in_0->type() != tensor_in_1->type())
return false;
- if (tensor_out->type != tensor_in_1->type)
+ if (tensor_out->type() != tensor_in_1->type())
{
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
index d8420a43c..d2a275b62 100644
--- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
@@ -29,10 +29,11 @@ bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in_0 = tensors.at(inputs.at(0));
- const auto &tensor_in_1 = tensors.at(inputs.at(1));
- if (tensor_in_0->type != tensor_in_1->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in_0 = tensors.at(inputs.at(0));
+ const auto tensor_in_1 = tensors.at(inputs.at(1));
+ assert(tensor_in_0 != nullptr && tensor_in_1 != nullptr);
+ if (tensor_in_0->type() != tensor_in_1->type())
return false;
// TODO dtype check
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
index 58750d79a..cc7be1693 100644
--- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -42,6 +42,7 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT
const auto *options = op.builtin_options.AsFullyConnectedOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
node->weights_format(luci_weights_format(options->weights_format));
+ node->keep_num_dims(options->keep_num_dims);
return node;
}
diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
index a4bb26a10..d336878ad 100644
--- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
@@ -31,10 +31,11 @@ bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
+ auto indices_tensor = args.reader.tensors()[inputs.at(1)];
+ assert(indices_tensor != nullptr);
- if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
- indices_tensor->type == circle::TensorType::TensorType_INT64))
+ if (!(indices_tensor->type() == circle::TensorType::TensorType_INT32 ||
+ indices_tensor->type() == circle::TensorType::TensorType_INT64))
{
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp
index f9c00346c..7f031b0ba 100644
--- a/compiler/luci/import/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp
@@ -37,17 +37,19 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
return false;
// NOTE: real models do have output dtype NOT BOOL
- if (tensors[outputs[0]]->type != circle::TensorType_BOOL)
+ assert(tensors[outputs[0]] != nullptr);
+ if (tensors[outputs[0]]->type() != circle::TensorType_BOOL)
{
if (settings->get(luci::UserSettings::Key::DisableValidation))
{
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto output_tensor = tensors[outputs[0]];
auto name = tensor_name(output_tensor);
WARN(l) << "Warning: import Greater(" << name << ") output dtype is not boolean";
}
diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
index e20038fd9..ac4ce62f5 100644
--- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
@@ -30,14 +30,16 @@ bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
{
return false;
}
- return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+ assert(tensors[outputs[0]] != nullptr);
+ return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
}
CircleNode *CircleGreaterEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp
index ffdbf0b79..e8a50ff32 100644
--- a/compiler/luci/import/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/import/src/Nodes/CircleIf.cpp
@@ -42,12 +42,13 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
return false;
// input 0 should be BOOL type
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- if (tensor->type != circle::TensorType_BOOL)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType_BOOL)
return false;
- const auto &shape = tensor->shape;
+ const auto shape = wrap(tensor->shape());
if (shape.size() != 1 && shape.size() != 0)
return false;
diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp
index f9b99bebe..5c5ae51e1 100644
--- a/compiler/luci/import/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLess.cpp
@@ -30,10 +30,11 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
- switch (tensor->type)
+ switch (tensor->type())
{
case circle::TensorType_FLOAT32:
case circle::TensorType_FLOAT64:
@@ -48,12 +49,14 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensors[inputs.at(1)]->type != tensor->type)
+ assert(tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(1)]->type() != tensor->type())
{
return false;
}
- return tensors[outputs[0]]->type == circle::TensorType_BOOL;
+ assert(tensors[outputs[0]] != nullptr);
+ return tensors[outputs[0]]->type() == circle::TensorType_BOOL;
}
CircleNode *CircleLessGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
index bb1712137..8a2aea8db 100644
--- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
@@ -30,14 +30,16 @@ bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
{
return false;
}
- return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+ assert(tensors[outputs[0]] != nullptr);
+ return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
}
CircleNode *CircleLessEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp
index 26b575070..f41926829 100644
--- a/compiler/luci/import/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLog.cpp
@@ -32,9 +32,10 @@ bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
// input type check
// Must be one of bfloat16, half, float32, float64, complex64, complex128.
// Currently circle supports half(float16), float32, float64, complex64.
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
index b13fc2735..b61fb6f3e 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
@@ -30,11 +30,12 @@ bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const
// Only BOOL type is allowed for inputs
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
for (auto input : inputs)
{
- const auto &tensor = tensors.at(input);
- if (tensor->type != circle::TensorType::TensorType_BOOL)
+ const auto tensor = tensors.at(input);
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType::TensorType_BOOL)
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
index f68218349..43e9ed39f 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -30,9 +30,10 @@ bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
// Only BOOL type is allowed for the input
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- if (tensor->type != circle::TensorType::TensorType_BOOL)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType::TensorType_BOOL)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
index 8c9023dd3..6354e7dc1 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -30,11 +30,12 @@ bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
// Only BOOL type is allowed for inputs
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
for (auto input : inputs)
{
- const auto &tensor = tensors.at(input);
- if (tensor->type != circle::TensorType::TensorType_BOOL)
+ const auto tensor = tensors.at(input);
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType::TensorType_BOOL)
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
index 0f92a9bb4..b0d08e039 100644
--- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
@@ -30,8 +30,9 @@ bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+ const auto tensors = args.reader.tensors();
+ assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+ if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
index 590a07f2d..384b98586 100644
--- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
@@ -30,10 +30,11 @@ bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
- if (tensors[outputs[0]]->type != tensor->type)
+ assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+ if (tensors[outputs[0]]->type() != tensor->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
index edd7d2ae2..64870c057 100644
--- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
@@ -30,10 +30,11 @@ bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
- if (tensors[outputs[0]]->type != tensor->type)
+ assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+ if (tensors[outputs[0]]->type() != tensor->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
index d3d69506b..e86f2ba81 100644
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -35,20 +35,26 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c
if (outputs.size() != 2)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &boxes_tensor = tensors.at(inputs[0]);
- if (boxes_tensor->shape.size() != 2)
+ const auto tensors = args.reader.tensors();
+ const auto boxes_tensor = tensors.at(inputs[0]);
+ assert(boxes_tensor != nullptr);
+ const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+ if (boxes_tensor_shape.size() != 2)
return false;
- if (boxes_tensor->shape.at(1) != 4)
+ if (boxes_tensor_shape.at(1) != 4)
return false;
- if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+ assert(tensors.at(inputs[1]) != nullptr);
+ if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
return false;
- if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+ assert(tensors.at(inputs[2]) != nullptr);
+ if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
return false;
- if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+ assert(tensors.at(inputs[3]) != nullptr);
+ if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
return false;
- if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+ assert(tensors.at(inputs[4]) != nullptr);
+ if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
index d797d4cb7..a60eed4e4 100644
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -35,22 +35,29 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c
if (outputs.size() != 3)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &boxes_tensor = tensors.at(inputs[0]);
- if (boxes_tensor->shape.size() != 2)
+ const auto tensors = args.reader.tensors();
+ const auto boxes_tensor = tensors.at(inputs[0]);
+ assert(boxes_tensor != nullptr);
+ const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+ if (boxes_tensor_shape.size() != 2)
return false;
- if (boxes_tensor->shape.at(1) != 4)
+ if (boxes_tensor_shape.at(1) != 4)
return false;
- if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+ assert(tensors.at(inputs[1]) != nullptr);
+ if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
return false;
- if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+ assert(tensors.at(inputs[2]) != nullptr);
+ if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
return false;
- if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+ assert(tensors.at(inputs[3]) != nullptr);
+ if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
return false;
- if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+ assert(tensors.at(inputs[4]) != nullptr);
+ if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
return false;
- if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+ assert(tensors.at(inputs[5]) != nullptr);
+ if (tensors.at(inputs[5])->type() != circle::TensorType_FLOAT32)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
index a0b8f9e4f..3f5c1e033 100644
--- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
@@ -30,14 +30,16 @@ bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
{
return false;
}
- return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+ assert(tensors[outputs[0]] != nullptr);
+ return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
}
CircleNode *CircleNotEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
index 3952cc21a..6e5f8e16f 100644
--- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
@@ -32,21 +32,25 @@ bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto *options = args.op.builtin_options.AsOneHotOptions();
- const auto &tensors = args.reader.tensors();
- const auto &indices = tensors.at(inputs.at(0));
- const auto &depth = tensors.at(inputs.at(1));
- const auto &on_value = tensors.at(inputs.at(2));
- const auto &off_value = tensors.at(inputs.at(3));
+ const auto tensors = args.reader.tensors();
+ const auto indices = tensors.at(inputs.at(0));
+ const auto depth = tensors.at(inputs.at(1));
+ const auto on_value = tensors.at(inputs.at(2));
+ const auto off_value = tensors.at(inputs.at(3));
+ assert(indices != nullptr);
+ assert(depth != nullptr);
+ assert(on_value != nullptr);
+ assert(off_value != nullptr);
- if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size()))
+ if (options->axis < -1 || options->axis > static_cast<int32_t>(wrap(indices->shape()).size()))
return false;
- if (depth->shape.size() != 0)
+ if (wrap(depth->shape()).size() != 0)
return false;
- if (on_value->shape.size() != 0)
+ if (wrap(on_value->shape()).size() != 0)
return false;
- if (off_value->shape.size() != 0)
+ if (wrap(off_value->shape()).size() != 0)
return false;
- if (on_value->type != off_value->type)
+ if (on_value->type() != off_value->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
index 13205dd7a..ebe2368e0 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
@@ -28,17 +28,20 @@ bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto &tensor_1 = tensors.at(inputs.at(1));
- const auto &tensor_o = tensors.at(outputs[0]);
+ const auto tensors = args.reader.tensors();
+ const auto tensor_0 = tensors.at(inputs.at(0));
+ const auto tensor_1 = tensors.at(inputs.at(1));
+ const auto tensor_o = tensors.at(outputs[0]);
+ assert(tensor_0 != nullptr);
+ assert(tensor_1 != nullptr);
+ assert(tensor_o != nullptr);
- if (tensor_0->type != circle::TensorType_BOOL)
+ if (tensor_0->type() != circle::TensorType_BOOL)
return false;
- if (tensor_o->type != circle::TensorType_BOOL)
+ if (tensor_o->type() != circle::TensorType_BOOL)
return false;
- switch (tensor_1->type)
+ switch (tensor_1->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
index 3549c1a18..3b874b7c9 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
@@ -27,13 +27,14 @@ bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
+ const auto tensors = args.reader.tensors();
+ const auto tensor_1 = tensors.at(inputs.at(1));
+ assert(tensor_1 != nullptr);
// TODO check input types
// Check for reduction_indices types
- switch (tensor_1->type)
+ switch (tensor_1->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp
index 401dff0fc..3421620ce 100644
--- a/compiler/luci/import/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -34,12 +34,13 @@ bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const
if (args.op.inputs.size() == 2)
{
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs.at(1));
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in = tensors.at(inputs.at(1));
+ assert(tensor_in != nullptr);
// NOTE fix this if there is any other case
// TensorFlow lite and circle only supports S32
- if (tensor_in->type != circle::TensorType::TensorType_INT32)
+ if (tensor_in->type() != circle::TensorType::TensorType_INT32)
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
index 2fbb7a87c..c9cc792bb 100644
--- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
@@ -30,12 +30,15 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs.at(0));
- const auto &tensor_lengths = tensors.at(inputs.at(1));
- const auto &tensor_out = tensors.at(outputs[0]);
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in = tensors.at(inputs.at(0));
+ const auto tensor_lengths = tensors.at(inputs.at(1));
+ const auto tensor_out = tensors.at(outputs[0]);
+ assert(tensor_in != nullptr);
+ assert(tensor_lengths != nullptr);
+ assert(tensor_out != nullptr);
- switch (tensor_lengths->type)
+ switch (tensor_lengths->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensor_in->type != tensor_out->type)
+ if (tensor_in->type() != tensor_out->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
index ca7653201..c19a0fdd2 100644
--- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
@@ -30,12 +30,15 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs.at(0));
- const auto &tensor_axis = tensors.at(inputs.at(1));
- const auto &tensor_out = tensors.at(outputs[0]);
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in = tensors.at(inputs.at(0));
+ const auto tensor_axis = tensors.at(inputs.at(1));
+ const auto tensor_out = tensors.at(outputs[0]);
+ assert(tensor_in != nullptr);
+ assert(tensor_axis != nullptr);
+ assert(tensor_out != nullptr);
- switch (tensor_axis->type)
+ switch (tensor_axis->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensor_out->type != tensor_in->type)
+ if (tensor_out->type() != tensor_in->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp
index d13e0fafe..08cfae6c2 100644
--- a/compiler/luci/import/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRound.cpp
@@ -33,11 +33,13 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
// Must be one of the following types
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs.at(0));
- const auto &tensor_out = tensors.at(outputs[0]);
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in = tensors.at(inputs.at(0));
+ const auto tensor_out = tensors.at(outputs[0]);
+ assert(tensor_in != nullptr);
+ assert(tensor_out != nullptr);
- switch (tensor_in->type)
+ switch (tensor_in->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
@@ -49,7 +51,7 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensor_out->type != tensor_in->type)
+ if (tensor_out->type() != tensor_in->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
index a9ca90832..e3bc68f8b 100644
--- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -32,9 +32,10 @@ bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
// Must be one of the following types
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_UINT8:
case circle::TensorType_INT16:
diff --git a/compiler/luci/import/src/Nodes/CircleSVDF.cpp b/compiler/luci/import/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..83a025177
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleSVDF.h"
+
+#include <luci/IR/Nodes/CircleSVDF.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleSVDFBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ if (!(inputs.size() == 4 || inputs.size() == 5))
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleSVDFBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleSVDF>();
+ node->input(inputs.at(0));
+ node->weight_feature(inputs.at(1));
+ node->weight_time(inputs.at(2));
+ if (inputs.size() == 4)
+ {
+ auto *bias = graph->nodes()->create<CircleOutputExclude>();
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+ // a dummy type is inserted.
+ bias->dtype(inputs.at(0)->dtype());
+ node->bias(bias);
+
+ node->input_activation_state(inputs.at(3));
+ }
+ else
+ {
+ node->bias(inputs.at(3));
+ node->input_activation_state(inputs.at(4));
+ }
+
+ const auto *options = op.builtin_options.AsSVDFOptions();
+ node->svdf_rank(options->rank);
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+ node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
index f8c175110..ebe252527 100644
--- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
@@ -30,14 +30,15 @@ bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
// indices must have the same type as shape
- const auto &tensors = args.reader.tensors();
+ const auto tensors = args.reader.tensors();
- if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type)
+ assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(2)] != nullptr);
+ if (tensors[inputs.at(0)]->type() != tensors[inputs.at(2)]->type())
return false;
// indices must be either int32 or int64
- if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 &&
- tensors[inputs.at(0)]->type != circle::TensorType_INT64)
+ if (tensors[inputs.at(0)]->type() != circle::TensorType_INT32 &&
+ tensors[inputs.at(0)]->type() != circle::TensorType_INT64)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
index bfa333e8d..01d1aab44 100644
--- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
@@ -30,12 +30,15 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs.at(0));
- const auto &tensor_out = tensors.at(outputs[0]);
- const auto &tensor_ids = tensors.at(inputs.at(1));
+ const auto tensors = args.reader.tensors();
+ const auto tensor_in = tensors.at(inputs.at(0));
+ const auto tensor_out = tensors.at(outputs[0]);
+ const auto tensor_ids = tensors.at(inputs.at(1));
+ assert(tensor_in != nullptr);
+ assert(tensor_out != nullptr);
+ assert(tensor_ids != nullptr);
- switch (tensor_ids->type)
+ switch (tensor_ids->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
return false;
}
- if (tensor_out->type != tensor_in->type)
+ if (tensor_out->type() != tensor_in->type())
{
return false;
}
diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp
index 36a5fa8a8..002f62f6c 100644
--- a/compiler/luci/import/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp
@@ -29,9 +29,10 @@ bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- if (tensor->type != circle::TensorType_BOOL)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType_BOOL)
return false;
// TODO check dtypes for input 1, 2
diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
index 556c8fa33..062fdc143 100644
--- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
@@ -29,14 +29,16 @@ bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &condition = tensors.at(inputs.at(0));
- if (condition->type != circle::TensorType_BOOL)
+ const auto tensors = args.reader.tensors();
+ const auto condition = tensors.at(inputs.at(0));
+ assert(condition != nullptr);
+ if (condition->type() != circle::TensorType_BOOL)
return false;
- const auto &t = tensors.at(inputs.at(1));
- const auto &e = tensors.at(inputs.at(2));
- if (t->type != e->type)
+ const auto t = tensors.at(inputs.at(1));
+ const auto e = tensors.at(inputs.at(2));
+ assert(t != nullptr && e != nullptr);
+ if (t->type() != e->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp
index 22f461123..51ebf0355 100644
--- a/compiler/luci/import/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSin.cpp
@@ -30,9 +30,10 @@ bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
// input type check
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp
index 7ff2b84e6..bec84b4c0 100644
--- a/compiler/luci/import/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp
@@ -29,13 +29,13 @@ bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- // Must be one of the following types
- // bfloat16, half (float16), float32, float64, complex64, complex128
- // Currently, circle supports float16, float32, complex64
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
+ case circle::TensorType_UINT8:
+ case circle::TensorType_INT16:
case circle::TensorType_INT32:
case circle::TensorType_INT64:
case circle::TensorType_FLOAT16:
diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
index 33440d5ab..1983465d3 100644
--- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
@@ -32,9 +32,10 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
const auto &outputs = args.op.outputs;
// Inputs must be one of the following types
// bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
@@ -53,11 +54,13 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
}
// Input types must match
- if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type)
+ assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(inputs.at(1)) != nullptr);
+ if (tensors.at(inputs.at(0))->type() != tensors.at(inputs.at(1))->type())
return false;
// Input and output types must match
- if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+ assert(tensors.at(outputs[0]) != nullptr);
+ if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp
index 95625a0e4..80a0e887f 100644
--- a/compiler/luci/import/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp
@@ -30,8 +30,9 @@ bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+ const auto tensors = args.reader.tensors();
+ assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+ if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp
index 6da44130c..c41a6ba3f 100644
--- a/compiler/luci/import/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTile.cpp
@@ -32,9 +32,10 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
auto outputs = args.op.outputs;
// Multiples (inputs.at(1)) must be one of the following types
// int32, int64
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(1));
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(1));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -44,7 +45,8 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
}
// Type of input and output must be the same
- if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+ assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+ if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
index 49f858798..9f9173738 100644
--- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
@@ -35,9 +35,10 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
if (outputs.size() != 2)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(1));
- if (tensor->type != circle::TensorType_INT32)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(1));
+ assert(tensor != nullptr);
+ if (tensor->type() != circle::TensorType_INT32)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
index 5a60e2f54..041983dac 100644
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -31,11 +31,13 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
return false;
const auto &inputs = args.op.inputs;
- const auto &tensors = args.reader.tensors();
- const auto &filter_tensor = tensors.at(inputs.at(1));
- const auto &filter_shape = filter_tensor.get()->shape;
- const auto &ifm_tensor = tensors.at(inputs.at(2));
- const auto &ifm_shape = ifm_tensor.get()->shape;
+ const auto tensors = args.reader.tensors();
+ const auto filter_tensor = tensors.at(inputs.at(1));
+ assert(filter_tensor != nullptr);
+ const auto filter_shape = wrap(filter_tensor->shape());
+ const auto ifm_tensor = tensors.at(inputs.at(2));
+ assert(ifm_tensor != nullptr);
+ const auto ifm_shape = wrap(ifm_tensor->shape());
// ifm and filters must be 4-D tensor
if (ifm_shape.size() != 4)
@@ -45,7 +47,7 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
// input shape : [batch, height, width, in_channels]
// filters shape : [output_channels, height, weight, in_channels]
- if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3))
+ if (ifm_shape.at(3) != filter_shape.at(3))
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
index 9bfc76b57..6b3401609 100644
--- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
@@ -46,8 +46,8 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
{
if (settings->get(luci::UserSettings::Key::DisableValidation))
{
- const auto &tensors = args.reader.tensors();
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto tensors = args.reader.tensors();
+ const auto output_tensor = tensors[outputs[0]];
auto name = tensor_name(output_tensor);
WARN(l) << "Warning: import Unpack(" << name << ") 'num' is not same as outputs used";
}
@@ -58,9 +58,10 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
if (options->num < 0)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
- const auto &shape = tensor->shape;
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ const auto shape = wrap(tensor->shape());
auto shape_size = static_cast<int32_t>(shape.size());
if (shape_size > 0)
{
diff --git a/compiler/luci/import/src/Nodes/CircleVariable.cpp b/compiler/luci/import/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..23ae9e7be
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleVariable.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+#include <luci/Log.h>
+
+#include <cassert>
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
+{
+ uint32_t seq = 0;
+ for (const auto &v : vect)
+ {
+ if (seq)
+ os << ", ";
+ os << v;
+ seq++;
+ }
+ return os;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index)
+{
+ LOGGER(l);
+
+ auto graph = context->graph();
+ auto reader = context->reader();
+ const auto tensors = reader->tensors();
+ const auto variable_tensor = tensors[tensor_index];
+ assert(variable_tensor != nullptr);
+
+ if (not variable_tensor->is_variable())
+ {
+ // not a variable
+ return nullptr;
+ }
+ {
+ // check if there is no buffer as we don't support this for now
+ // TODO use buffer when this is enabled in Kernel
+ assert(reader->buffers()[variable_tensor->buffer()] != nullptr);
+ assert(reader->buffers()[variable_tensor->buffer()]->data() == nullptr);
+ }
+
+ auto variable_node = graph->nodes()->create<CircleVariable>();
+ copy_tensor_attributes(variable_tensor, variable_node);
+ variable_node->shape_status(luci::ShapeStatus::VALID);
+
+ INFO(l) << "[luci] NodeFinder variable node(" << tensor_index << ") -> " << variable_node << " "
+ << wrap(variable_tensor->shape()) << std::endl;
+
+ return variable_node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp
index 8e4f1a0c4..bc6199ace 100644
--- a/compiler/luci/import/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp
@@ -30,14 +30,16 @@ bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_condition = tensors.at(inputs.at(0));
- const auto &tensor_out = tensors.at(outputs[0]);
+ const auto tensors = args.reader.tensors();
+ const auto tensor_condition = tensors.at(inputs.at(0));
+ const auto tensor_out = tensors.at(outputs[0]);
+ assert(tensor_condition != nullptr);
+ assert(tensor_out != nullptr);
- if (tensor_condition->type != circle::TensorType_BOOL)
+ if (tensor_condition->type() != circle::TensorType_BOOL)
return false;
- if (tensor_out->type != circle::TensorType_INT64)
+ if (tensor_out->type() != circle::TensorType_INT64)
return false;
return true;
diff --git a/compiler/luci/import/src/Nodes/CircleWhile.cpp b/compiler/luci/import/src/Nodes/CircleWhile.cpp
index 26147562f..27a392b2a 100644
--- a/compiler/luci/import/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhile.cpp
@@ -67,8 +67,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
const std::vector<int32_t> &inputs = op.inputs;
const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
+ const auto tensors = context->reader()->tensors();
+ const auto opcodes = context->reader()->opcodes();
std::vector<CircleNode *> input_nodes;
for (const int32_t input_tensor_index : inputs)
@@ -96,9 +96,11 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
assert(outputs.size() > 0);
{
// Lets use name of output 0 as While name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ const auto output_tensor = tensors[outputs[0]];
+ assert(output_tensor != nullptr);
node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
+ assert(opcodes[op.opcode_index] != nullptr);
+ node->op_version(opcodes[op.opcode_index]->version());
// NOTE We don't set quantization for While itself but to virtual outputs
}
@@ -106,7 +108,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
// Create virtual outputs of While
for (uint32_t n = 0; n < output_count; ++n)
{
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ const auto output_tensor = tensors[outputs[n]];
+ assert(output_tensor != nullptr);
auto *nodeout = graph->nodes()->create<CircleWhileOut>();
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp
index 27306ba90..fc027704b 100644
--- a/compiler/luci/import/src/ValidateHelpers.cpp
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -26,9 +26,10 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
return false;
// input 1 and 2 should have INT32/INT64 type
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
- switch (tensor_1->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor_1 = tensors.at(inputs.at(1));
+ assert(tensor_1 != nullptr);
+ switch (tensor_1->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -36,8 +37,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
default:
return false;
}
- const auto &tensor_2 = tensors.at(inputs.at(2));
- switch (tensor_2->type)
+ const auto tensor_2 = tensors.at(inputs.at(2));
+ assert(tensor_2 != nullptr);
+ switch (tensor_2->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
@@ -47,8 +49,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
}
// Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto t_0_s = tensor_0->shape.size();
+ const auto tensor_0 = tensors.at(inputs.at(0));
+ assert(tensor_0 != nullptr);
+ const auto t_0_s = wrap(tensor_0->shape()).size();
if (t_0_s != 3 && t_0_s != 4)
return false;
@@ -68,10 +71,10 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
if (outputs.size() != 1)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
-
- switch (tensor->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor = tensors.at(inputs.at(0));
+ assert(tensor != nullptr);
+ switch (tensor->type())
{
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
@@ -84,10 +87,12 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
return false;
}
- if (tensors[inputs.at(1)]->type != tensor->type)
+ assert(tensors[inputs.at(1)] != nullptr);
+ if (tensors[inputs.at(1)]->type() != tensor->type())
return false;
- if (tensors[outputs[0]]->type != tensor->type)
+ assert(tensors[outputs[0]] != nullptr);
+ if (tensors[outputs[0]]->type() != tensor->type())
return false;
return true;
@@ -104,10 +109,10 @@ bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
if (outputs.size() != 1)
return false;
- const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs.at(1));
-
- switch (tensor_axis->type)
+ const auto tensors = args.reader.tensors();
+ const auto tensor_axis = tensors.at(inputs.at(1));
+ assert(tensor_axis != nullptr);
+ switch (tensor_axis->type())
{
case circle::TensorType_INT32:
case circle::TensorType_INT64:
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index a313f9d5b..d89ea03cc 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -29,7 +29,6 @@
#include "Nodes/CircleCast.h"
#include "Nodes/CircleCeil.h"
#include "Nodes/CircleConcatenation.h"
-#include "Nodes/CircleConst.h"
#include "Nodes/CircleConv2D.h"
#include "Nodes/CircleCos.h"
#include "Nodes/CircleCustom.h"
@@ -119,6 +118,7 @@
#include "Nodes/CircleStridedSlice.h"
#include "Nodes/CircleSub.h"
#include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
#include "Nodes/CircleTanh.h"
#include "Nodes/CircleTile.h"
#include "Nodes/CircleTopKV2.h"
@@ -135,18 +135,21 @@
#include "Nodes/CircleBCQGather.h"
#include "Nodes/CircleInstanceNorm.h"
// Virtual nodes
+#include "Nodes/CircleConst.h"
#include "Nodes/CircleInput.h"
#include "Nodes/CircleOutput.h"
+#include "Nodes/CircleVariable.h"
+// Multi-output virtual nodes
#include "Nodes/CircleBidirectionalSequenceLSTMOut.h"
#include "Nodes/CircleCustomOut.h"
#include "Nodes/CircleIfOut.h"
#include "Nodes/CircleNonMaxSuppressionV4Out.h"
#include "Nodes/CircleNonMaxSuppressionV5Out.h"
-#include "Nodes/CircleUnpackOut.h"
-#include "Nodes/CircleUniqueOut.h"
#include "Nodes/CircleSplitOut.h"
#include "Nodes/CircleSplitVOut.h"
#include "Nodes/CircleTopKV2Out.h"
+#include "Nodes/CircleUniqueOut.h"
+#include "Nodes/CircleUnpackOut.h"
#include "Nodes/CircleWhileOut.h"
#include <loco/IR/Graph.h>
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 914aa16e4..1472008df 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -116,6 +116,7 @@ CIRCLE_NODE(SQUEEZE, CircleSqueeze)
CIRCLE_NODE(STRIDED_SLICE, CircleStridedSlice)
CIRCLE_NODE(SUB, CircleSub)
CIRCLE_NODE(SUM, CircleSum)
+CIRCLE_NODE(SVDF, CircleSVDF)
CIRCLE_NODE(TANH, CircleTanh)
CIRCLE_NODE(TILE, CircleTile)
CIRCLE_NODE(TOPK_V2, CircleTopKV2)
@@ -132,12 +133,14 @@ CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnected)
CIRCLE_NODE(BCQ_GATHER, CircleBCQGather)
CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNorm)
// Virtual node(s)
-CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut)
CIRCLE_VNODE(CIRCLECONST, CircleConst)
CIRCLE_VNODE(CIRCLEINPUT, CircleInput)
CIRCLE_VNODE(CIRCLEOUTPUT, CircleOutput)
CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, CircleOutputDummy)
CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExclude)
+CIRCLE_VNODE(CIRCLEVARIABLE, CircleVariable)
+// Multi-output virtual nodes
+CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut)
CIRCLE_VNODE(CIRCLECUSTOMOUT, CircleCustomOut)
CIRCLE_VNODE(CIRCLEIFOUT, CircleIfOut)
CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4Out)
diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
index 694437303..8afc80a76 100644
--- a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
+++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
@@ -32,6 +32,10 @@ struct CircleQuantParam
int32_t quantized_dimension{0};
};
+struct CircleNode;
+
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst);
+
} // namespace luci
#endif // __LUCI_IR_CIRCLEQUANTPARAM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
index 2862cadb2..dc5aeb267 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
@@ -58,8 +58,12 @@ public:
WeightsFormat weights_format(void) const { return _weights_format; }
void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; }
+ bool keep_num_dims(void) const { return _keep_num_dims; }
+ void keep_num_dims(bool keep_num_dims) { _keep_num_dims = keep_num_dims; }
+
private:
WeightsFormat _weights_format{WeightsFormat::DEFAULT};
+ bool _keep_num_dims{false};
};
} // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h
new file mode 100644
index 000000000..839d11e04
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_SVDF_H__
+#define __LUCI_IR_CIRCLE_SVDF_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SVDF in Circle
+ */
+class CircleSVDF final : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::SVDF>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+ CircleSVDF() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *weight_feature(void) const { return at(1)->node(); }
+ void weight_feature(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *weight_time(void) const { return at(2)->node(); }
+ void weight_time(loco::Node *node) { at(2)->node(node); }
+
+ loco::Node *bias(void) const { return at(3)->node(); }
+ void bias(loco::Node *node) { at(3)->node(node); }
+
+ loco::Node *input_activation_state(void) const { return at(4)->node(); }
+ void input_activation_state(loco::Node *node) { at(4)->node(node); }
+
+public:
+ bool asymmetric_quantize_inputs() const { return _asymmetric_quantize_inputs; }
+ void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+ }
+
+ int32_t svdf_rank() const { return _rank; }
+ void svdf_rank(int32_t svdf_rank) { _rank = svdf_rank; }
+
+private:
+ bool _asymmetric_quantize_inputs = false;
+ int32_t _rank = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_SVDF_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h
new file mode 100644
index 000000000..8c15b66c9
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_VARIABLE_H__
+#define __LUCI_IR_CIRCLE_VARIABLE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CircleVariable in Circle for 'variable' Tensor
+ */
+class CircleVariable final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEVARIABLE>>
+{
+public:
+ CircleVariable() = default;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_VARIABLE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc b/compiler/luci/lang/src/CircleQuantParam.cpp
index fd3bc5579..89671d3c3 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc
+++ b/compiler/luci/lang/src/CircleQuantParam.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,32 +14,33 @@
* limitations under the License.
*/
-#include "ClMemory.h"
+#include "luci/IR/CircleQuantParam.h"
+#include "luci/IR/CircleNode.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
+#include <memory>
+
+namespace luci
{
-cl_mem_flags ToClMemFlags(AccessType access_type)
+/**
+ * @brief copy CircleQuantParam of src to dst
+ */
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst)
{
- switch (access_type)
+ auto q = src->quantparam();
+ if (q == nullptr)
+ dst->quantparam(nullptr);
+ else
{
- case AccessType::READ:
- return CL_MEM_READ_ONLY;
- case AccessType::WRITE:
- return CL_MEM_WRITE_ONLY;
- case AccessType::READ_WRITE:
- return CL_MEM_READ_WRITE;
- default:
- throw std::runtime_error("Invalid AccessType");
- }
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale = q->scale;
+ qparam->zerop = q->zerop;
+ qparam->min = q->min;
+ qparam->max = q->max;
+ qparam->quantized_dimension = q->quantized_dimension;
- return CL_MEM_READ_ONLY; // unreachable
+ dst->quantparam(std::move(qparam));
+ }
}
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
+} // namespace luci
diff --git a/compiler/luci/lang/src/CircleQuantParam.test.cpp b/compiler/luci/lang/src/CircleQuantParam.test.cpp
new file mode 100644
index 000000000..520ca05cc
--- /dev/null
+++ b/compiler/luci/lang/src/CircleQuantParam.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NOTE any node will do for testing
+#include "luci/IR/Nodes/CircleAdd.h"
+
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+ auto node = g->nodes()->create<luci::CircleAdd>();
+
+ node->name("name");
+ node->dtype(loco::DataType::FLOAT32);
+ node->rank(1);
+ node->dim(0).set(3);
+ node->shape_status(luci::ShapeStatus::VALID);
+ node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale = {1.0};
+ qparam->zerop = {0};
+ qparam->min = {0.0};
+ qparam->max = {1.0};
+ qparam->quantized_dimension = 0;
+ node->quantparam(std::move(qparam));
+
+ return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_quantparam)
+{
+ auto g = loco::make_graph();
+ auto node = build_simple_add_graph(g.get());
+
+ auto copy = g->nodes()->create<luci::CircleAdd>();
+ luci::copy_quantparam(node, copy);
+
+ const auto *qparam_node = node->quantparam();
+ const auto *qparam_copy = copy->quantparam();
+ ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+ ASSERT_EQ(qparam_node->zerop, qparam_copy->zerop);
+ ASSERT_EQ(qparam_node->quantized_dimension, qparam_copy->quantized_dimension);
+}
+
+TEST(CircleNodeCloneTest, copy_quantparam_NEG)
+{
+ auto g = loco::make_graph();
+ auto node = build_simple_add_graph(g.get());
+
+ node->quantparam(nullptr);
+
+ auto copy = g->nodes()->create<luci::CircleAdd>();
+ luci::copy_quantparam(node, copy);
+
+ const auto *qparam_copy = copy->quantparam();
+ ASSERT_EQ(qparam_copy, nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
index bb0e3c51b..15a780085 100644
--- a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
@@ -32,6 +32,7 @@ TEST(CircleFullyConnectedTest, constructor)
ASSERT_EQ(nullptr, fc_node.weights());
ASSERT_EQ(nullptr, fc_node.bias());
ASSERT_EQ(luci::FusedActFunc::UNDEFINED, fc_node.fusedActivationFunction());
+ ASSERT_EQ(false, fc_node.keep_num_dims());
}
TEST(CircleFullyConnectedTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..833ae0732
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSVDF.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSVDFTest, constructor)
+{
+ luci::CircleSVDF svdf_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), svdf_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::SVDF, svdf_node.opcode());
+
+ ASSERT_EQ(nullptr, svdf_node.input());
+ ASSERT_EQ(nullptr, svdf_node.weight_feature());
+ ASSERT_EQ(nullptr, svdf_node.weight_time());
+ ASSERT_EQ(nullptr, svdf_node.bias());
+ ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+
+ ASSERT_EQ(false, svdf_node.asymmetric_quantize_inputs());
+ ASSERT_EQ(0, svdf_node.svdf_rank());
+}
+
+TEST(CircleSVDFTest, input_NEG)
+{
+ luci::CircleSVDF svdf_node;
+ luci::CircleSVDF node;
+
+ svdf_node.input(&node);
+ svdf_node.weight_feature(&node);
+ svdf_node.weight_time(&node);
+ svdf_node.bias(&node);
+ svdf_node.input_activation_state(&node);
+
+ ASSERT_NE(nullptr, svdf_node.input());
+ ASSERT_NE(nullptr, svdf_node.weight_feature());
+ ASSERT_NE(nullptr, svdf_node.weight_time());
+ ASSERT_NE(nullptr, svdf_node.bias());
+ ASSERT_NE(nullptr, svdf_node.input_activation_state());
+
+ svdf_node.input(nullptr);
+ svdf_node.weight_feature(nullptr);
+ svdf_node.weight_time(nullptr);
+ svdf_node.bias(nullptr);
+ svdf_node.input_activation_state(nullptr);
+
+ ASSERT_EQ(nullptr, svdf_node.input());
+ ASSERT_EQ(nullptr, svdf_node.weight_feature());
+ ASSERT_EQ(nullptr, svdf_node.weight_time());
+ ASSERT_EQ(nullptr, svdf_node.bias());
+ ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+}
+
+TEST(CircleSVDFTest, arity_NEG)
+{
+ luci::CircleSVDF svdf_node;
+
+ ASSERT_NO_THROW(svdf_node.arg(4));
+ ASSERT_THROW(svdf_node.arg(5), std::out_of_range);
+}
+
+TEST(CircleSVDFTest, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleSVDF svdf_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
+
+TEST(CircleSVDFTest, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleSVDF svdf_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp
new file mode 100644
index 000000000..e1864f8da
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleVariable.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleVariableTest, constructor)
+{
+ luci::CircleVariable var_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), var_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::CIRCLEVARIABLE, var_node.opcode());
+}
+
+TEST(CircleVariableTest, arity_NEG)
+{
+ luci::CircleVariable var_node;
+
+ ASSERT_THROW(var_node.arg(0), std::out_of_range);
+}
+
+TEST(CircleVariableTest, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleVariable var_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
+
+TEST(CircleVariableTest, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleVariable var_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
index aed9fb79b..b8a2111dd 100644
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,5 +1,7 @@
# TODO Find how to test logging-ex utility
file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
if (NOT LUCI_LIBRARY_TYPE)
set(LUCI_LIBRARY_TYPE "SHARED")
@@ -13,7 +15,17 @@ target_link_libraries(luci_logex PRIVATE luci_log)
target_link_libraries(luci_logex PRIVATE luci_lang)
target_link_libraries(luci_logex PRIVATE hermes_std)
target_link_libraries(luci_logex PRIVATE nncc_common)
-target_link_libraries(luci_logex PRIVATE pepper_str)
install(TARGETS luci_logex DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_logex_test ${TESTS})
+target_include_directories(luci_logex_test PRIVATE src)
+target_link_libraries(luci_logex_test luci_logex)
+target_link_libraries(luci_logex_test luci_lang)
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
new file mode 100644
index 000000000..eff0830b4
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleDialect.h>
+
+#include <memory>
+
+namespace
+{
+
+std::string circle_opname(luci::CircleOpcode opcode)
+{
+ static const std::string prefix{"circle."};
+
+ switch (opcode)
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ case luci::CircleOpcode::OPCODE: \
+ return prefix + #OPCODE;
+#define CIRCLE_VNODE CIRCLE_NODE
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+ default:
+ break;
+ };
+
+ return prefix + "Invalid";
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleNodeSummaryBuilder::build(const loco::Node *node, const locop::SymbolTable *tbl,
+ locop::NodeSummary &s)
+{
+ if (node->dialect() != luci::CircleDialect::get())
+ return false;
+
+ auto ptr_to_str = [](const void *ptr) {
+ std::stringstream ss;
+ ss << ptr;
+ return ss.str();
+ };
+
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ if (const auto builder = create_builder(circle_node))
+ {
+ if (!builder->validate(circle_node))
+ {
+ s.state(locop::NodeDesc::State::Invalid);
+ return false;
+ }
+
+ auto input_names = builder->get_input_names(circle_node);
+ assert(node->arity() == input_names.size());
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ s.args().append(input_names.at(i), tbl->lookup(node->arg(i)));
+
+ builder->build_attributes(circle_node, s);
+ builder->update_status(s);
+
+ s.opname(circle_opname(circle_node->opcode()));
+ s.comments().append("[" + circle_node->name() + "] = " + ptr_to_str(node));
+
+ return true;
+ }
+ else
+ {
+ // When SummaryBuilder is not implemented, return false
+ return false;
+ }
+}
+
+bool CircleNodeSummaryBuilder::validate(const luci::CircleNode *) { return true; }
+
+std::vector<std::string> CircleNodeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ // Return empty names for default
+ return std::vector<std::string>();
+}
+
+void CircleNodeSummaryBuilder::build_attributes(const luci::CircleNode *, locop::NodeSummary &)
+{
+ // Do nothing for default
+}
+
+void CircleNodeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+ s.state(locop::NodeDesc::State::Complete);
+}
+
+std::unique_ptr<CircleNodeSummaryBuilder>
+CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node)
+{
+ switch (node->opcode())
+ {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ case luci::CircleOpcode::OPCODE: \
+ { \
+ return std::make_unique<CLASS>(); \
+ }
+
+ CIRCLE_NODE(ABS, CircleAbsSummaryBuilder)
+ CIRCLE_NODE(ADD, CircleAddSummaryBuilder)
+ CIRCLE_NODE(ADD_N, CircleAddNSummaryBuilder)
+ CIRCLE_NODE(ARG_MAX, CircleArgMaxSummaryBuilder)
+ CIRCLE_NODE(ARG_MIN, CircleArgMinSummaryBuilder)
+ CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2DSummaryBuilder)
+ CIRCLE_NODE(BATCH_MATMUL, CircleBatchMatMulSummaryBuilder)
+ CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDSummaryBuilder)
+ CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedSummaryBuilder)
+ CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherSummaryBuilder)
+ CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMSummaryBuilder)
+ CIRCLE_NODE(CAST, CircleCastSummaryBuilder)
+ CIRCLE_NODE(CEIL, CircleCeilSummaryBuilder)
+ CIRCLE_NODE(CONCATENATION, CircleConcatenationSummaryBuilder)
+ CIRCLE_NODE(CIRCLECONST, CircleConstSummaryBuilder)
+ CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder)
+ CIRCLE_NODE(COS, CircleCosSummaryBuilder)
+ CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder)
+ CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder)
+ CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder)
+ CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder)
+ CIRCLE_NODE(DIV, CircleDivSummaryBuilder)
+ CIRCLE_NODE(ELU, CircleEluSummaryBuilder)
+ CIRCLE_NODE(EQUAL, CircleEqualSummaryBuilder)
+ CIRCLE_NODE(EXP, CircleExpSummaryBuilder)
+ CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsSummaryBuilder)
+ CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantSummaryBuilder)
+ CIRCLE_NODE(FILL, CircleFillSummaryBuilder)
+ CIRCLE_NODE(FLOOR, CircleFloorSummaryBuilder)
+ CIRCLE_NODE(FLOOR_DIV, CircleFloorDivSummaryBuilder)
+ CIRCLE_NODE(FLOOR_MOD, CircleFloorModSummaryBuilder)
+ CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedSummaryBuilder)
+ CIRCLE_NODE(GATHER, CircleGatherSummaryBuilder)
+ CIRCLE_NODE(GATHER_ND, CircleGatherNdSummaryBuilder)
+ CIRCLE_NODE(GREATER, CircleGreaterSummaryBuilder)
+ CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqualSummaryBuilder)
+ CIRCLE_NODE(IF, CircleIfSummaryBuilder)
+ CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNormSummaryBuilder)
+ CIRCLE_NODE(L2_NORMALIZATION, CircleL2NormalizeSummaryBuilder)
+ CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2DSummaryBuilder)
+ CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluSummaryBuilder)
+ CIRCLE_NODE(LESS, CircleLessSummaryBuilder)
+ CIRCLE_NODE(LESS_EQUAL, CircleLessEqualSummaryBuilder)
+ CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalizationSummaryBuilder)
+ CIRCLE_NODE(LOG, CircleLogSummaryBuilder)
+ CIRCLE_NODE(LOGICAL_AND, CircleLogicalAndSummaryBuilder)
+ CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNotSummaryBuilder)
+ CIRCLE_NODE(LOGICAL_OR, CircleLogicalOrSummaryBuilder)
+ CIRCLE_NODE(LOGISTIC, CircleLogisticSummaryBuilder)
+ CIRCLE_NODE(LOG_SOFTMAX, CircleLogSoftmaxSummaryBuilder)
+ CIRCLE_NODE(MATRIX_DIAG, CircleMatrixDiagSummaryBuilder)
+ CIRCLE_NODE(MATRIX_SET_DIAG, CircleMatrixSetDiagSummaryBuilder)
+ CIRCLE_NODE(MAXIMUM, CircleMaximumSummaryBuilder)
+ CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2DSummaryBuilder)
+ CIRCLE_NODE(MEAN, CircleMeanSummaryBuilder)
+ CIRCLE_NODE(MINIMUM, CircleMinimumSummaryBuilder)
+ CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadSummaryBuilder)
+ CIRCLE_NODE(MUL, CircleMulSummaryBuilder)
+ CIRCLE_NODE(NEG, CircleNegSummaryBuilder)
+ CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4SummaryBuilder)
+ CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5SummaryBuilder)
+ CIRCLE_NODE(NOT_EQUAL, CircleNotEqualSummaryBuilder)
+ CIRCLE_NODE(ONE_HOT, CircleOneHotSummaryBuilder)
+ CIRCLE_NODE(PACK, CirclePackSummaryBuilder)
+ CIRCLE_NODE(PAD, CirclePadSummaryBuilder)
+ CIRCLE_NODE(PADV2, CirclePadV2SummaryBuilder)
+ CIRCLE_NODE(POW, CirclePowSummaryBuilder)
+ CIRCLE_NODE(PRELU, CirclePReluSummaryBuilder)
+ CIRCLE_NODE(QUANTIZE, CircleQuantizeSummaryBuilder)
+ CIRCLE_NODE(RANGE, CircleRangeSummaryBuilder)
+ CIRCLE_NODE(RANK, CircleRankSummaryBuilder)
+ CIRCLE_NODE(REDUCE_ANY, CircleReduceAnySummaryBuilder)
+ CIRCLE_NODE(REDUCE_MAX, CircleReduceMaxSummaryBuilder)
+ CIRCLE_NODE(REDUCE_MIN, CircleReduceMinSummaryBuilder)
+ CIRCLE_NODE(REDUCE_PROD, CircleReduceProdSummaryBuilder)
+ CIRCLE_NODE(RELU, CircleReluSummaryBuilder)
+ CIRCLE_NODE(RELU6, CircleRelu6SummaryBuilder)
+ CIRCLE_NODE(RELU_N1_TO_1, CircleReluN1To1SummaryBuilder)
+ CIRCLE_NODE(RESHAPE, CircleReshapeSummaryBuilder)
+ CIRCLE_NODE(RESIZE_BILINEAR, CircleResizeBilinearSummaryBuilder)
+ CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, CircleResizeNearestNeighborSummaryBuilder)
+ CIRCLE_NODE(REVERSE_SEQUENCE, CircleReverseSequenceSummaryBuilder)
+ CIRCLE_NODE(REVERSE_V2, CircleReverseV2SummaryBuilder)
+ CIRCLE_NODE(ROUND, CircleRoundSummaryBuilder)
+ CIRCLE_NODE(RSQRT, CircleRsqrtSummaryBuilder)
+ CIRCLE_NODE(SCATTER_ND, CircleScatterNdSummaryBuilder)
+ CIRCLE_NODE(SEGMENT_SUM, CircleSegmentSumSummaryBuilder)
+ CIRCLE_NODE(SELECT, CircleSelectSummaryBuilder)
+ CIRCLE_NODE(SELECT_V2, CircleSelectV2SummaryBuilder)
+ CIRCLE_NODE(SHAPE, CircleShapeSummaryBuilder)
+ CIRCLE_NODE(SIN, CircleSinSummaryBuilder)
+ CIRCLE_NODE(SLICE, CircleSliceSummaryBuilder)
+ CIRCLE_NODE(SOFTMAX, CircleSoftmaxSummaryBuilder)
+ CIRCLE_NODE(SPACE_TO_BATCH_ND, CircleSpaceToBatchNDSummaryBuilder)
+ CIRCLE_NODE(SPACE_TO_DEPTH, CircleSpaceToDepthSummaryBuilder)
+ CIRCLE_NODE(SPARSE_TO_DENSE, CircleSparseToDenseSummaryBuilder)
+ CIRCLE_NODE(SPLIT, CircleSplitSummaryBuilder)
+ CIRCLE_NODE(SPLIT_V, CircleSplitVSummaryBuilder)
+ CIRCLE_NODE(SQRT, CircleSqrtSummaryBuilder)
+ CIRCLE_NODE(SQUARE, CircleSquareSummaryBuilder)
+ CIRCLE_NODE(SQUARED_DIFFERENCE, CircleSquaredDifferenceSummaryBuilder)
+ CIRCLE_NODE(SQUEEZE, CircleSqueezeSummaryBuilder)
+ CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceSummaryBuilder)
+ CIRCLE_NODE(SUB, CircleSubSummaryBuilder)
+ CIRCLE_NODE(SUM, CircleSumSummaryBuilder)
+ CIRCLE_NODE(SVDF, CircleSVDFSummaryBuilder)
+ CIRCLE_NODE(TANH, CircleTanhSummaryBuilder)
+ CIRCLE_NODE(TILE, CircleTileSummaryBuilder)
+ CIRCLE_NODE(TOPK_V2, CircleTopKV2SummaryBuilder)
+ CIRCLE_NODE(TRANSPOSE, CircleTransposeSummaryBuilder)
+ CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvSummaryBuilder)
+ CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTMSummaryBuilder)
+ CIRCLE_NODE(UNIQUE, CircleUniqueSummaryBuilder)
+ CIRCLE_NODE(UNPACK, CircleUnpackSummaryBuilder)
+ CIRCLE_NODE(WHERE, CircleWhereSummaryBuilder)
+ CIRCLE_NODE(WHILE, CircleWhileSummaryBuilder)
+ CIRCLE_NODE(ZEROS_LIKE, CircleZerosLikeSummaryBuilder)
+
+ CIRCLE_NODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT,
+ CircleBidirectionalSequenceLSTMOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLECUSTOMOUT, CircleCustomOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEIFOUT, CircleIfOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEINPUT, CircleInputSummaryBuilder)
+ CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4OutSummaryBuilder)
+ CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5OutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEOUTPUT, CircleOutputSummaryBuilder)
+ CIRCLE_NODE(CIRCLEOUTPUTDUMMY, CircleOutputDummySummaryBuilder)
+ CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExcludeSummaryBuilder)
+ CIRCLE_NODE(CIRCLESPLITOUT, CircleSplitOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLESPLITVOUT, CircleSplitVOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLETOPKV2OUT, CircleTopKV2OutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEUNIQUEOUT, CircleUniqueOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEUNPACKOUT, CircleUnpackOutSummaryBuilder)
+ CIRCLE_NODE(CIRCLEVARIABLE, CircleVariableSummaryBuilder)
+ CIRCLE_NODE(CIRCLEWHILEOUT, CircleWhileOutSummaryBuilder)
+
+ default:
+ return nullptr;
+
+#undef CIRCLE_NODE
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.h b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h
new file mode 100644
index 000000000..e21d77310
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+
+#include <luci/IR/CircleNode.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <memory>
+#include <sstream>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeSummaryBuilder
+{
+public:
+ bool build(const loco::Node *node, const locop::SymbolTable *tbl, locop::NodeSummary &s);
+
+private:
+ /**
+ * @brief Template methods for building node summary.
+ * Default behavior is building a node which has no input.
+ */
+ virtual bool validate(const luci::CircleNode *node);
+ virtual std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ virtual void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+ virtual void update_status(locop::NodeSummary &s);
+
+private:
+ std::unique_ptr<CircleNodeSummaryBuilder> create_builder(const luci::CircleNode *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp
new file mode 100644
index 000000000..89ea213e0
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class MockSymbolTable : public locop::SymbolTable
+{
+ std::string lookup(const loco::Node *) const override
+ {
+ return "Do nothing because it is mocking Symbol Table!";
+ }
+};
+
+class CircleNodeSummaryBuilderTest : public ::testing::Test
+{
+protected:
+ bool mock_build(const loco::Node *node)
+ {
+ return luci::CircleNodeSummaryBuilder().build(node, &_tbl, _s);
+ }
+
+protected:
+ MockSymbolTable _tbl;
+ locop::NodeSummary _s;
+};
+
+} // namespace
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate)
+{
+ luci::CircleAdd node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate_fused_NEG)
+{
+ luci::CircleAdd node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate)
+{
+ luci::CircleAveragePool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_fused_NEG)
+{
+ luci::CircleAveragePool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node.padding(luci::Padding::SAME);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_padding_NEG)
+{
+ luci::CircleAveragePool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate)
+{
+ luci::CircleBCQFullyConnected node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate_fused_NEG)
+{
+ luci::CircleBCQFullyConnected node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate)
+{
+ luci::CircleConcatenation node(2);
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate_fused_NEG)
+{
+ luci::CircleConcatenation node(2);
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate)
+{
+ luci::CircleConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_fused_NEG)
+{
+ luci::CircleConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node.padding(luci::Padding::SAME);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_padding_NEG)
+{
+ luci::CircleConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate)
+{
+ luci::CircleDepthwiseConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_fused_NEG)
+{
+ luci::CircleDepthwiseConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node.padding(luci::Padding::SAME);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_padding_NEG)
+{
+ luci::CircleDepthwiseConv2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate)
+{
+ luci::CircleFullyConnected node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate_fused_NEG)
+{
+ luci::CircleFullyConnected node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate)
+{
+ luci::CircleInstanceNorm node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate_fused_NEG)
+{
+ luci::CircleInstanceNorm node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate)
+{
+ luci::CircleL2Normalize node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate_fused_NEG)
+{
+ luci::CircleL2Normalize node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate)
+{
+ luci::CircleL2Pool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_fused_NEG)
+{
+ luci::CircleL2Pool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node.padding(luci::Padding::SAME);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_padding_NEG)
+{
+ luci::CircleL2Pool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate)
+{
+ luci::CircleMaxPool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_fused_NEG)
+{
+ luci::CircleMaxPool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node.padding(luci::Padding::SAME);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_padding_NEG)
+{
+ luci::CircleMaxPool2D node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate)
+{
+ luci::CircleMirrorPad node;
+ node.mode(luci::MirrorPadMode::REFLECT);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate_mirror_padding_NEG)
+{
+ luci::CircleMirrorPad node;
+ node.mode(luci::MirrorPadMode::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate)
+{
+ luci::CircleMul node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate_fused_NEG)
+{
+ luci::CircleMul node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate)
+{
+ luci::CircleSVDF node;
+ node.fusedActivationFunction(luci::FusedActFunc::RELU);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate_fused_NEG)
+{
+ luci::CircleSVDF node;
+ node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate)
+{
+ luci::CircleTransposeConv node;
+ node.padding(luci::Padding::SAME);
+ EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate_padding_NEG)
+{
+ luci::CircleTransposeConv node;
+ node.padding(luci::Padding::UNDEFINED);
+ EXPECT_FALSE(mock_build(&node));
+}
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
new file mode 100644
index 000000000..6df9270e3
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
@@ -0,0 +1,1128 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <loco/IR/Node.h>
+
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string to_str(loco::DataType type)
+{
+ switch (type)
+ {
+ case loco::DataType::U8:
+ return "UINT8";
+ case loco::DataType::U16:
+ return "UINT16";
+ case loco::DataType::U32:
+ return "UINT32";
+ case loco::DataType::U64:
+ return "UINT64";
+
+ case loco::DataType::S8:
+ return "INT8";
+ case loco::DataType::S16:
+ return "INT16";
+ case loco::DataType::S32:
+ return "INT32";
+ case loco::DataType::S64:
+ return "INT64";
+
+ case loco::DataType::FLOAT16:
+ return "FLOAT16";
+ case loco::DataType::FLOAT32:
+ return "FLOAT32";
+ case loco::DataType::FLOAT64:
+ return "FLOAT64";
+
+ case loco::DataType::BOOL:
+ return "BOOL";
+
+ default:
+ return "Error";
+ }
+}
+
+std::string to_str(bool value) { return value ? "true" : "false"; }
+
+std::string to_str(luci::FusedActFunc fused)
+{
+ switch (fused)
+ {
+ case luci::FusedActFunc::NONE:
+ return "NONE";
+ case luci::FusedActFunc::RELU:
+ return "RELU";
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return "RELU_N1_TO_1";
+ case luci::FusedActFunc::RELU6:
+ return "RELU6";
+ case luci::FusedActFunc::TANH:
+ return "TANH";
+ case luci::FusedActFunc::SIGN_BIT:
+ return "SIGN_BIT";
+ default:
+ return "Error";
+ }
+}
+
+std::string to_str(luci::Padding padding)
+{
+ switch (padding)
+ {
+ case luci::Padding::SAME:
+ return "SAME";
+ case luci::Padding::VALID:
+ return "VALID";
+ default:
+ return "Error";
+ }
+}
+
+std::string to_str(const luci::Stride *stride)
+{
+ return std::to_string(stride->h()) + "," + std::to_string(stride->w());
+}
+
+std::string to_str(const luci::Filter *filter)
+{
+ return std::to_string(filter->h()) + "," + std::to_string(filter->w());
+}
+
+std::string to_str(luci::MirrorPadMode mode)
+{
+ switch (mode)
+ {
+ case luci::MirrorPadMode::REFLECT:
+ return "REFLECT";
+ case luci::MirrorPadMode::SYMMETRIC:
+ return "SYMMETRIC";
+ default:
+ return "Error";
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::vector<std::string> CircleNodeWithXSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"x"};
+}
+
+std::vector<std::string>
+CircleNodeWithINPUTSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input"};
+}
+
+std::vector<std::string> CircleNodeWithXYSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"x", "y"};
+}
+
+std::vector<std::string>
+CircleNodeWithFEATURESSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"features"};
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool CircleAddSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto add = loco::must_cast<const luci::CircleAdd *>(node);
+ if (add->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+void CircleAddSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto add = loco::must_cast<const luci::CircleAdd *>(node);
+ s.args().append("fused_activation_function", to_str(add->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleAddNSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ return std::vector<std::string>(node->arity(), "inputs");
+}
+
+std::vector<std::string> CircleArgMaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "dimension"};
+}
+
+void CircleArgMaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto argmax = loco::must_cast<const luci::CircleArgMax *>(node);
+ s.args().append("output_type", to_str(argmax->output_type()));
+}
+
+std::vector<std::string> CircleArgMinSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "dimension"};
+}
+
+void CircleArgMinSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto argmin = loco::must_cast<const luci::CircleArgMin *>(node);
+ s.args().append("output_type", to_str(argmin->output_type()));
+}
+
+bool CircleAveragePool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+ if (avgpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+ if (avgpool->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleAveragePool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"value"};
+}
+
+void CircleAveragePool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+ s.args().append("filter(h,w)", to_str(avgpool->filter()));
+ s.args().append("stride(h,w)", to_str(avgpool->stride()));
+ s.args().append("padding", to_str(avgpool->padding()));
+ s.args().append("fused_activation_function", to_str(avgpool->fusedActivationFunction()));
+}
+
+void CircleBatchMatMulSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto batchmatmul = loco::must_cast<const luci::CircleBatchMatMul *>(node);
+ s.args().append("adj_x", to_str(batchmatmul->adj_x()));
+ s.args().append("adj_y", to_str(batchmatmul->adj_y()));
+}
+
+std::vector<std::string>
+CircleBatchToSpaceNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "block_shape", "crops"};
+}
+
+bool CircleBCQFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+ if (bcq_fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleBCQFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "weights_scales", "weights_binary", "bias", "weights_clusters"};
+}
+
+void CircleBCQFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+ s.args().append("fused_activation_function", to_str(bcq_fc->fusedActivationFunction()));
+ s.args().append("weights_hidden_size", std::to_string(bcq_fc->weights_hidden_size()));
+}
+
+std::vector<std::string> CircleBCQGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input_scales", "input_binary", "indices", "input_clusters"};
+}
+
+void CircleBCQGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto bcq_gather = loco::must_cast<const luci::CircleBCQGather *>(node);
+ s.args().append("axis", std::to_string(bcq_gather->axis()));
+ s.args().append("input_hidden_size", std::to_string(bcq_gather->input_hidden_size()));
+}
+
+std::vector<std::string>
+CircleBidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input",
+ "fw_input_to_input_weights",
+ "fw_input_to_forget_weights",
+ "fw_input_to_cell_weights",
+ "fw_input_to_output_weights",
+ "fw_recurrent_to_input_weights",
+ "fw_recurrent_to_forget_weights",
+ "fw_recurrent_to_cell_weights",
+ "fw_recurrent_to_output_weights",
+ "fw_cell_to_input_weights",
+ "fw_cell_to_forget_weights",
+ "fw_cell_to_output_weights",
+ "fw_input_gate_bias",
+ "fw_forget_gate_bias",
+ "fw_cell_gate_bias",
+ "fw_output_gate_bias",
+ "fw_projection_weights",
+ "fw_projection_bias",
+ "bw_input_to_input_weights",
+ "bw_input_to_forget_weights",
+ "bw_input_to_cell_weights",
+ "bw_input_to_output_weights",
+ "bw_recurrent_to_input_weights",
+ "bw_recurrent_to_forget_weights",
+ "bw_recurrent_to_cell_weights",
+ "bw_recurrent_to_output_weights",
+ "bw_cell_to_input_weights",
+ "bw_cell_to_forget_weights",
+ "bw_cell_to_output_weights",
+ "bw_input_gate_bias",
+ "bw_forget_gate_bias",
+ "bw_cell_gate_bias",
+ "bw_output_gate_bias",
+ "bw_projection_weights",
+ "bw_projection_bias",
+ "fw_activation_state",
+ "fw_cell_state",
+ "bw_activation_state",
+ "bw_cell_state",
+ "auxillary_input",
+ "fw_auxillary_input_to_input_weights",
+ "fw_auxillary_input_to_forget_weights",
+ "fw_auxillary_input_to_cell_weights",
+ "fw_auxillary_input_to_output_weights",
+ "bw_auxillary_input_to_input_weights",
+ "bw_auxillary_input_to_forget_weights",
+ "bw_auxillary_input_to_cell_weights",
+ "bw_auxillary_input_to_output_weights"};
+}
+
+void CircleBidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto lstm = loco::must_cast<const luci::CircleBidirectionalSequenceLSTM *>(node);
+ s.args().append("cell_clip", to_str(lstm->cell_clip()));
+ s.args().append("proj_clip", to_str(lstm->proj_clip()));
+ s.args().append("merge_outputs", to_str(lstm->merge_outputs()));
+ s.args().append("time_major", to_str(lstm->time_major()));
+ s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+std::vector<std::string> CircleCastSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"x"};
+}
+
+void CircleCastSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto cast = loco::must_cast<const luci::CircleCast *>(node);
+ s.args().append("in_data_type", to_str(cast->in_data_type()));
+ s.args().append("out_data_type", to_str(cast->out_data_type()));
+}
+
+bool CircleConcatenationSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+ if (concat->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleConcatenationSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ return std::vector<std::string>(node->arity(), "values");
+}
+
+void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+ s.args().append("axis", std::to_string(concat->axis()));
+ s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction()));
+}
+
+void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+ s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+bool CircleConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+ if (conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+ if (conv2d->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "filter", "bias"};
+}
+
+void CircleConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+ s.args().append("stride(h,w)", to_str(conv2d->stride()));
+ s.args().append("dilation(h,w)", to_str(conv2d->dilation()));
+ s.args().append("padding", to_str(conv2d->padding()));
+ s.args().append("fused_activation_function", to_str(conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleCustomSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ auto input_names = std::vector<std::string>();
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ input_names.push_back("input" + std::to_string(i));
+ return input_names;
+}
+
+void CircleCustomSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto custom = loco::must_cast<const luci::CircleCustom *>(node);
+ s.args().append("custom_code", custom->custom_code());
+}
+
+void CircleDepthToSpaceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto depth_to_space = loco::must_cast<const luci::CircleDepthToSpace *>(node);
+ s.args().append("block_size", std::to_string(depth_to_space->block_size()));
+}
+
+bool CircleDepthwiseConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+ if (dw_conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+ if (dw_conv2d->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleDepthwiseConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "filter", "bias"};
+}
+
+void CircleDepthwiseConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+ s.args().append("stride(h,w)", to_str(dw_conv2d->stride()));
+ s.args().append("dilation(h,w)", to_str(dw_conv2d->dilation()));
+ s.args().append("padding", to_str(dw_conv2d->padding()));
+ s.args().append("depthMultiplier", std::to_string(dw_conv2d->depthMultiplier()));
+ s.args().append("fused_activation_function", to_str(dw_conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleExpandDimsSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "axis"};
+}
+
+std::vector<std::string> CircleFakeQuantSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"inputs"};
+}
+
+void CircleFakeQuantSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto fake_quant = loco::must_cast<const luci::CircleFakeQuant *>(node);
+ s.args().append("min", std::to_string(fake_quant->min()));
+ s.args().append("max", std::to_string(fake_quant->max()));
+ s.args().append("num_bits", std::to_string(fake_quant->num_bits()));
+ s.args().append("narrow_range", to_str(fake_quant->narrow_range()));
+}
+
+std::vector<std::string> CircleFillSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"dims", "value"};
+}
+
+bool CircleFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+ if (fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "weights", "bias"};
+}
+
+void CircleFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+ s.args().append("fused_activation_function", to_str(fc->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"params", "indices"};
+}
+
+void CircleGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto gather = loco::must_cast<const luci::CircleGather *>(node);
+ s.args().append("axis", std::to_string(gather->axis()));
+}
+
+std::vector<std::string> CircleGatherNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"params", "indices"};
+}
+
+std::vector<std::string> CircleIfSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+ auto input_names = std::vector<std::string>();
+ input_names.push_back("cond");
+ for (uint32_t i = 0; i < circle_if->input_count(); ++i)
+ input_names.push_back("input");
+
+ return input_names;
+}
+
+void CircleIfSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+ if (circle_if->then_graph() != nullptr)
+ s.args().append("then_graph", circle_if->then_graph()->name());
+ else
+ s.args().append("then_branch", std::to_string(circle_if->then_branch()));
+
+ if (circle_if->else_graph() != nullptr)
+ s.args().append("else_graph", circle_if->else_graph()->name());
+ else
+ s.args().append("else_branch", std::to_string(circle_if->else_branch()));
+}
+
+bool CircleInstanceNormSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+ if (instnorm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleInstanceNormSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "gamma", "beta"};
+}
+
+void CircleInstanceNormSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+ s.args().append("epsilon", std::to_string(instnorm->epsilon()));
+ s.args().append("fused_activation_function", to_str(instnorm->fusedActivationFunction()));
+}
+
+bool CircleL2NormalizeSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+ if (l2norm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleL2NormalizeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"x"};
+}
+
+void CircleL2NormalizeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+ s.args().append("fused_activation_function", to_str(l2norm->fusedActivationFunction()));
+}
+
+bool CircleL2Pool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+ if (l2pool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+ if (l2pool->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleL2Pool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"value"};
+}
+
+void CircleL2Pool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+ s.args().append("filter(h,w)", to_str(l2pool->filter()));
+ s.args().append("stride(h,w)", to_str(l2pool->stride()));
+ s.args().append("padding", to_str(l2pool->padding()));
+ s.args().append("fused_activation_function", to_str(l2pool->fusedActivationFunction()));
+}
+
+void CircleLeakyReluSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto leaky_relu = loco::must_cast<const luci::CircleLeakyRelu *>(node);
+ s.args().append("alpha", std::to_string(leaky_relu->alpha()));
+}
+
+void CircleLocalResponseNormalizationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto lrn = loco::must_cast<const luci::CircleLocalResponseNormalization *>(node);
+ s.args().append("radius", std::to_string(lrn->radius()));
+ s.args().append("bias", std::to_string(lrn->bias()));
+ s.args().append("alpha", std::to_string(lrn->alpha()));
+ s.args().append("beta", std::to_string(lrn->beta()));
+}
+
+std::vector<std::string> CircleLogSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"logits"};
+}
+
+std::vector<std::string> CircleMatrixDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"diagonal"};
+}
+
+std::vector<std::string>
+CircleMatrixSetDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "diagonal"};
+}
+
+bool CircleMaxPool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+ if (maxpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+ if (maxpool->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleMaxPool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"value"};
+}
+
+void CircleMaxPool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+ s.args().append("filter(h,w)", to_str(maxpool->filter()));
+ s.args().append("stride(h,w)", to_str(maxpool->stride()));
+ s.args().append("padding", to_str(maxpool->padding()));
+ s.args().append("fused_activation_function", to_str(maxpool->fusedActivationFunction()));
+}
+
+bool CircleMirrorPadSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+ if (mirror_pad->mode() == luci::MirrorPadMode::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleMirrorPadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "paddings"};
+}
+
+void CircleMirrorPadSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+ s.args().append("mode", to_str(mirror_pad->mode()));
+}
+
+bool CircleMulSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto mul = loco::must_cast<const luci::CircleMul *>(node);
+ if (mul->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+void CircleMulSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto mul = loco::must_cast<const luci::CircleMul *>(node);
+ s.args().append("fused_activation_function", to_str(mul->fusedActivationFunction()));
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV4SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"boxes", "scores", "max_output_size", "iou_threshold", "score_threshold"};
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV5SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"boxes", "scores", "max_output_size",
+ "iou_threshold", "score_threshold", "soft_nms_sigma"};
+}
+
+std::vector<std::string> CircleOneHotSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"indices", "depth", "on_value", "off_value"};
+}
+
+void CircleOneHotSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto onehot = loco::must_cast<const luci::CircleOneHot *>(node);
+ s.args().append("axis", std::to_string(onehot->axis()));
+}
+
+std::vector<std::string> CirclePackSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ return std::vector<std::string>(node->arity(), "values");
+}
+
+void CirclePackSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto pack = loco::must_cast<const luci::CirclePack *>(node);
+ s.args().append("values_count", std::to_string(pack->values_count()));
+ s.args().append("axis", std::to_string(pack->axis()));
+}
+
+std::vector<std::string> CirclePadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "paddings"};
+}
+
+std::vector<std::string> CirclePadV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "paddings", "constant_values"};
+}
+
+std::vector<std::string> CirclePReluSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "alpha"};
+}
+
+std::vector<std::string> CircleRangeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"start", "limit", "delta"};
+}
+
+std::vector<std::string> CircleReshapeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"tensor", "shape"};
+}
+
+void CircleReshapeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+ s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+std::vector<std::string>
+CircleResizeBilinearSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "size"};
+}
+
+void CircleResizeBilinearSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto resize_bilinear = loco::must_cast<const luci::CircleResizeBilinear *>(node);
+ s.args().append("align_corners", to_str(resize_bilinear->align_corners()));
+ s.args().append("half_pixel_centers", to_str(resize_bilinear->half_pixel_centers()));
+}
+
+std::vector<std::string>
+CircleResizeNearestNeighborSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "size"};
+}
+
+void CircleResizeNearestNeighborSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto resize_nn = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(node);
+ s.args().append("align_corners", to_str(resize_nn->align_corners()));
+}
+
+std::vector<std::string>
+CircleReverseSequenceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "seq_lengths"};
+}
+
+void CircleReverseSequenceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto reverse_seq = loco::must_cast<const luci::CircleReverseSequence *>(node);
+ s.args().append("seq_axis", std::to_string(reverse_seq->seq_axis()));
+ s.args().append("batch_axis", std::to_string(reverse_seq->batch_axis()));
+}
+
+std::vector<std::string> CircleReverseV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"tensor", "axis"};
+}
+
+std::vector<std::string> CircleScatterNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"indices", "updates", "shape"};
+}
+
+std::vector<std::string> CircleSegmentSumSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "segment_ids"};
+}
+
+std::vector<std::string> CircleSelectSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"condition", "t", "e"};
+}
+
+std::vector<std::string> CircleSelectV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"condition", "t", "e"};
+}
+
+void CircleShapeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto shape = loco::must_cast<const luci::CircleShape *>(node);
+ s.args().append("out_type", to_str(shape->out_type()));
+}
+
+std::vector<std::string> CircleSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "begin", "size"};
+}
+
+std::vector<std::string> CircleSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"logits"};
+}
+
+void CircleSoftmaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto softmax = loco::must_cast<const luci::CircleSoftmax *>(node);
+ s.args().append("beta", to_str(softmax->beta()));
+}
+
+std::vector<std::string>
+CircleSpaceToBatchNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "block_shape", "paddings"};
+}
+
+void CircleSpaceToDepthSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto space_to_depth = loco::must_cast<const luci::CircleSpaceToDepth *>(node);
+ s.args().append("block_size", to_str(space_to_depth->block_size()));
+}
+
+std::vector<std::string>
+CircleSparseToDenseSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"indices", "output_shape", "values", "default_value"};
+}
+
+void CircleSparseToDenseSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto sparse_to_dense = loco::must_cast<const luci::CircleSparseToDense *>(node);
+ s.args().append("validate_indices", to_str(sparse_to_dense->validate_indices()));
+}
+
+std::vector<std::string> CircleSplitSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"split_dim", "input"};
+}
+
+void CircleSplitSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto split = loco::must_cast<const luci::CircleSplit *>(node);
+ s.args().append("num_split", std::to_string(split->num_split()));
+}
+
+std::vector<std::string> CircleSplitVSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "size_splits", "split_dim"};
+}
+
+void CircleSplitVSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto split_v = loco::must_cast<const luci::CircleSplitV *>(node);
+ s.args().append("num_split", std::to_string(split_v->num_split()));
+}
+
+void CircleSqueezeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto squeeze = loco::must_cast<const luci::CircleSqueeze *>(node);
+
+ std::string squeeze_dims = "(";
+ for (size_t i = 0; i < squeeze->squeeze_dims().size(); ++i)
+ {
+ if (i != 0)
+ squeeze_dims += ", ";
+ squeeze_dims += std::to_string(squeeze->squeeze_dims().at(i));
+ }
+ squeeze_dims += ")";
+
+ s.args().append("squeeze_dims", squeeze_dims);
+}
+
+std::vector<std::string> CircleStridedSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "begin", "end", "strides"};
+}
+
+void CircleStridedSliceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto strided_slice = loco::must_cast<const luci::CircleStridedSlice *>(node);
+ s.args().append("begin_mask", std::to_string(strided_slice->begin_mask()));
+ s.args().append("end_mask", std::to_string(strided_slice->end_mask()));
+ s.args().append("ellipsis_mask", std::to_string(strided_slice->ellipsis_mask()));
+ s.args().append("new_axis_mask", std::to_string(strided_slice->new_axis_mask()));
+ s.args().append("shrink_axis_mask", std::to_string(strided_slice->shrink_axis_mask()));
+}
+
+bool CircleSVDFSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+ if (svdf->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> CircleSVDFSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "weight_feature", "weight_time", "bias", "State"};
+}
+
+void CircleSVDFSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+ auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+ s.args().append("rank", to_str(svdf->svdf_rank()));
+ s.args().append("asymmetric_quantize_inputs", to_str(svdf->asymmetric_quantize_inputs()));
+ s.args().append("fused_activation_function", to_str(svdf->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleTileSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "multiples"};
+}
+
+std::vector<std::string> CircleTopKV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input", "k"};
+}
+
+std::vector<std::string> CircleTransposeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"a", "perm"};
+}
+
+bool CircleTransposeConvSummaryBuilder::validate(const luci::CircleNode *node)
+{
+ auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+ if (transpose_conv->padding() == luci::Padding::UNDEFINED)
+ return false;
+
+ return true;
+}
+
+std::vector<std::string>
+CircleTransposeConvSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"inputSizes", "filter", "outBackProp", "bias"};
+}
+
+void CircleTransposeConvSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+ s.args().append("stride(h,w)", to_str(transpose_conv->stride()));
+ s.args().append("padding", to_str(transpose_conv->padding()));
+}
+
+std::vector<std::string>
+CircleUnidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"input",
+ "input_to_input_weights",
+ "input_to_forget_weights",
+ "input_to_cell_weights",
+ "input_to_output_weights",
+ "recurrent_to_input_weights",
+ "recurrent_to_forget_weights",
+ "recurrent_to_cell_weights",
+ "recurrent_to_output_weights",
+ "cell_to_input_weights",
+ "cell_to_forget_weights",
+ "cell_to_output_weights",
+ "input_gate_bias",
+ "forget_gate_bias",
+ "cell_gate_bias",
+ "output_gate_bias",
+ "projection_weights",
+ "projection_bias",
+ "activation_state",
+ "cell_state",
+ "input_layer_norm_coefficients",
+ "forget_layer_norm_coefficients",
+ "cell_layer_norm_coefficients",
+ "output_layer_norm_coefficients"};
+}
+
+void CircleUnidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto lstm = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(node);
+ s.args().append("cell_clip", to_str(lstm->cell_clip()));
+ s.args().append("proj_clip", to_str(lstm->proj_clip()));
+ s.args().append("time_major", to_str(lstm->time_major()));
+ s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+void CircleUniqueSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto unique = loco::must_cast<const luci::CircleUnique *>(node);
+ s.args().append("idx_out_type", to_str(unique->idx_out_type()));
+}
+
+std::vector<std::string> CircleUnpackSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"value"};
+}
+
+void CircleUnpackSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto unpack = loco::must_cast<const luci::CircleUnpack *>(node);
+ s.args().append("num", std::to_string(unpack->num()));
+ s.args().append("axis", std::to_string(unpack->axis()));
+}
+std::vector<std::string> CircleWhereSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"condition"};
+}
+
+std::vector<std::string> CircleWhileSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+ auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+ auto input_names = std::vector<std::string>();
+ for (uint32_t i = 0; i < circle_while->input_count(); ++i)
+ input_names.push_back("input");
+
+ return input_names;
+}
+
+void CircleWhileSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+ if (circle_while->cond_graph() != nullptr)
+ s.args().append("then_graph", circle_while->cond_graph()->name());
+ else
+ s.args().append("then_branch", std::to_string(circle_while->cond_branch()));
+
+ if (circle_while->body_graph() != nullptr)
+ s.args().append("else_graph", circle_while->body_graph()->name());
+ else
+ s.args().append("else_branch", std::to_string(circle_while->body_branch()));
+}
+
+std::vector<std::string> CircleOutputSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"from"};
+}
+
+std::vector<std::string> CircleTopKV2OutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"topkv2"};
+}
+
+std::vector<std::string> CircleUniqueOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"unique"};
+}
+
+std::vector<std::string> CircleUnpackOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"unpack"};
+}
+
+std::vector<std::string> CircleWhileOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+ return {"while"};
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
new file mode 100644
index 000000000..6cd24b7f1
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeWithXSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithINPUTSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithXYSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithFEATURESSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+template <class REDUCER_NODE>
+class CircleNodeWithReducerSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *)
+ {
+ return {"input", "reduction_indices"};
+ }
+
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+ {
+ auto mean = loco::must_cast<const REDUCER_NODE *>(node);
+ s.args().append("keep_dims", mean->keep_dims() ? "true" : "false");
+ }
+};
+
+} // namespace luci
+
+namespace luci
+{
+
+class CircleAbsSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleAddSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAddNSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+};
+
+class CircleArgMaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleArgMinSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAveragePool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchMatMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchToSpaceNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleBCQFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBCQGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCastSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCeilSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleConcatenationSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ void update_status(locop::NodeSummary &s);
+};
+
+class CircleConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCosSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleCustomSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDepthwiseConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDequantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleEluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleExpSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleExpandDimsSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFakeQuantSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleFillSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFloorSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleFloorDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFloorModSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleGreaterSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleGreaterEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleIfSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleInstanceNormSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2NormalizeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2Pool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLeakyReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLessSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLessEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLocalResponseNormalizationSummaryBuilder final
+ : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLogSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalAndSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogicalNotSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalOrSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogisticSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixSetDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMaximumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMaxPool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMeanSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleMean>
+{
+};
+
+class CircleMinimumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMirrorPadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleNegSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNonMaxSuppressionV5SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNotEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleOneHotSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePadV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePowSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CirclePReluSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleQuantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleRangeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRankSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleReduceAnySummaryBuilder final
+ : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceAny>
+{
+};
+
+class CircleReduceMaxSummaryBuilder final
+ : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMax>
+{
+};
+
+class CircleReduceMinSummaryBuilder final
+ : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMin>
+{
+};
+
+class CircleReduceProdSummaryBuilder final
+ : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceProd>
+{
+};
+
+class CircleReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleRelu6SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReluN1To1SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReshapeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void update_status(locop::NodeSummary &s);
+};
+
+class CircleResizeBilinearSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleResizeNearestNeighborSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseSequenceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRoundSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleRsqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleScatterNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSegmentSumSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleShapeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSinSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSpaceToBatchNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSpaceToDepthSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSparseToDenseSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitVSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquareSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquaredDifferenceSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSqueezeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleStridedSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSubSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSumSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleSum>
+{
+};
+
+class CircleSVDFSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleTanhSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleTileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTopKV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeConvSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ bool validate(const luci::CircleNode *node);
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUniqueSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnpackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleWhereSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleWhileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *node);
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleZerosLikeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleBidirectionalSequenceLSTMOutSummaryBuilder final
+ : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleCustomOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleIfOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleInputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV5OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleOutputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleOutputDummySummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleOutputExcludeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleSplitOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleSplitVOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleTopKV2OutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUniqueOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUnpackOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleVariableSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleWhileOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+ std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp
index 0588ed79e..d3b2170b0 100644
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "CircleNodeSummaryBuilder.h"
#include "luci/FormattedGraph.h"
#include <luci/IR/CircleDialect.h>
@@ -25,2179 +26,6 @@
#include <sstream>
#include <vector>
-using namespace luci;
-/**
- * @brief dump std::vector<int64_t> values to stream
- */
-std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
-{
- for (auto vi : vi64)
- {
- os << vi << " ";
- }
- return os;
-}
-
-// For TF lite
-namespace
-{
-
-const char *to_str(loco::DataType type)
-{
- switch (type)
- {
- case loco::DataType::U8:
- return "UINT8";
- case loco::DataType::U16:
- return "UINT16";
- case loco::DataType::U32:
- return "UINT32";
- case loco::DataType::U64:
- return "UINT64";
-
- case loco::DataType::S8:
- return "INT8";
- case loco::DataType::S16:
- return "INT16";
- case loco::DataType::S32:
- return "INT32";
- case loco::DataType::S64:
- return "INT64";
-
- case loco::DataType::FLOAT16:
- return "FLOAT16";
- case loco::DataType::FLOAT32:
- return "FLOAT32";
- case loco::DataType::FLOAT64:
- return "FLOAT64";
-
- case loco::DataType::BOOL:
- return "BOOL";
-
- default:
- return "Error";
- }
-}
-
-const char *to_str(bool value) { return value ? "true" : "false"; }
-
-const char *to_str(luci::FusedActFunc fused)
-{
- switch (fused)
- {
- case luci::FusedActFunc::NONE:
- return "NONE";
- case luci::FusedActFunc::RELU:
- return "RELU";
- case luci::FusedActFunc::RELU_N1_TO_1:
- return "RELU_N1_TO_1";
- case luci::FusedActFunc::RELU6:
- return "RELU6";
- case luci::FusedActFunc::TANH:
- return "TANH";
- case luci::FusedActFunc::SIGN_BIT:
- return "SIGN_BIT";
- default:
- return "Error";
- }
-}
-
-const char *to_str(luci::Padding padding)
-{
- switch (padding)
- {
- case luci::Padding::SAME:
- return "SAME";
- case luci::Padding::VALID:
- return "VALID";
- default:
- return "Error";
- }
-}
-
-const char *to_str(luci::MirrorPadMode mode)
-{
- switch (mode)
- {
- case luci::MirrorPadMode::REFLECT:
- return "REFLECT";
- case luci::MirrorPadMode::SYMMETRIC:
- return "SYMMETRIC";
- default:
- return "Error";
- }
-}
-
-std::string to_str(const luci::Stride *stride)
-{
- return pepper::str(stride->h(), ",", stride->w());
-}
-
-std::string to_str(const luci::Filter *filter)
-{
- return pepper::str(filter->h(), ",", filter->w());
-}
-
-std::string circle_opname(uint32_t opnum)
-{
- static const std::string prefix{"circle."};
-
- switch (static_cast<luci::CircleOpcode>(opnum))
- {
-#define CIRCLE_NODE(OPCODE, CLASS) \
- case luci::CircleOpcode::OPCODE: \
- return prefix + #OPCODE;
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
- default:
- break;
- };
-
- return prefix + "Invalid";
-}
-
-// CircleNodeSummaryBuilder with default implementation
-class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
-{
-public:
- CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
- {
- // DO NOTHING
- }
-
-public:
- bool build(const loco::Node *, locop::NodeSummary &s) const final;
-
-protected:
-#define CIRCLE_NODE(OPCODE, CLASS) \
- virtual bool summary(const CLASS *, locop::NodeSummary &) const { return false; }
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
-protected:
- const locop::SymbolTable *tbl(void) const { return _tbl; }
-
-private:
- const locop::SymbolTable *_tbl;
-};
-
-template <class CIRCLENODE>
-bool use_x(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("x", tbl->lookup(node->x()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_input(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_features(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("features", tbl->lookup(node->features()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("x", tbl->lookup(node->x()));
- s.args().append("y", tbl->lookup(node->y()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy_act(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("x", tbl->lookup(node->x()));
- s.args().append("y", tbl->lookup(node->y()));
- s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_reducer(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("reduction_indices", tbl->lookup(node->reduction_indices()));
- s.args().append("keep_dims", node->keep_dims() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-template <class CIRCLENODE>
-bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("dimension", tbl->lookup(node->dimension()));
- s.args().append("output_type", to_str(node->output_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
- locop::NodeSummary &s)
-{
- for (uint32_t i = 0; i < node->arity(); ++i)
- s.args().append("inputs", tbl->lookup(node->inputs(i)));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
- locop::NodeSummary &s)
-{
- s.args().append("x", tbl->lookup(node->x()));
- s.args().append("y", tbl->lookup(node->y()));
- s.args().append("adj_x", to_str(node->adj_x()));
- s.args().append("adj_y", to_str(node->adj_y()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("block_shape", tbl->lookup(node->block_shape()));
- s.args().append("crops", tbl->lookup(node->crops()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBidirectionalSequenceLSTM *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
-
- s.args().append("fw_input_to_input_weights", tbl->lookup(node->fw_input_to_input_weights()));
- s.args().append("fw_input_to_forget_weights", tbl->lookup(node->fw_input_to_forget_weights()));
- s.args().append("fw_input_to_cell_weights", tbl->lookup(node->fw_input_to_cell_weights()));
- s.args().append("fw_input_to_output_weights", tbl->lookup(node->fw_input_to_output_weights()));
-
- s.args().append("fw_recurrent_to_input_weights",
- tbl->lookup(node->fw_recurrent_to_input_weights()));
- s.args().append("fw_recurrent_to_forget_weights",
- tbl->lookup(node->fw_recurrent_to_forget_weights()));
- s.args().append("fw_recurrent_to_cell_weights",
- tbl->lookup(node->fw_recurrent_to_cell_weights()));
- s.args().append("fw_recurrent_to_output_weights",
- tbl->lookup(node->fw_recurrent_to_output_weights()));
-
- s.args().append("fw_cell_to_input_weights", tbl->lookup(node->fw_cell_to_input_weights()));
- s.args().append("fw_cell_to_forget_weights", tbl->lookup(node->fw_cell_to_forget_weights()));
- s.args().append("fw_cell_to_output_weights", tbl->lookup(node->fw_cell_to_output_weights()));
-
- s.args().append("fw_input_gate_bias", tbl->lookup(node->fw_input_gate_bias()));
- s.args().append("fw_forget_gate_bias", tbl->lookup(node->fw_forget_gate_bias()));
- s.args().append("fw_cell_gate_bias", tbl->lookup(node->fw_cell_gate_bias()));
- s.args().append("fw_output_gate_bias", tbl->lookup(node->fw_output_gate_bias()));
-
- s.args().append("fw_projection_weights", tbl->lookup(node->fw_projection_weights()));
- s.args().append("fw_projection_bias", tbl->lookup(node->fw_projection_bias()));
-
- s.args().append("bw_input_to_input_weights", tbl->lookup(node->bw_input_to_input_weights()));
- s.args().append("bw_input_to_forget_weights", tbl->lookup(node->bw_input_to_forget_weights()));
- s.args().append("bw_input_to_cell_weights", tbl->lookup(node->bw_input_to_cell_weights()));
- s.args().append("bw_input_to_output_weights", tbl->lookup(node->bw_input_to_output_weights()));
-
- s.args().append("bw_recurrent_to_input_weights",
- tbl->lookup(node->bw_recurrent_to_input_weights()));
- s.args().append("bw_recurrent_to_forget_weights",
- tbl->lookup(node->bw_recurrent_to_forget_weights()));
- s.args().append("bw_recurrent_to_cell_weights",
- tbl->lookup(node->bw_recurrent_to_cell_weights()));
- s.args().append("bw_recurrent_to_output_weights",
- tbl->lookup(node->bw_recurrent_to_output_weights()));
-
- s.args().append("bw_cell_to_input_weights", tbl->lookup(node->bw_cell_to_input_weights()));
- s.args().append("bw_cell_to_forget_weights", tbl->lookup(node->bw_cell_to_forget_weights()));
- s.args().append("bw_cell_to_output_weights", tbl->lookup(node->bw_cell_to_output_weights()));
-
- s.args().append("bw_input_gate_bias", tbl->lookup(node->bw_input_gate_bias()));
- s.args().append("bw_forget_gate_bias", tbl->lookup(node->bw_forget_gate_bias()));
- s.args().append("bw_cell_gate_bias", tbl->lookup(node->bw_cell_gate_bias()));
- s.args().append("bw_output_gate_bias", tbl->lookup(node->bw_output_gate_bias()));
-
- s.args().append("bw_projection_weights", tbl->lookup(node->bw_projection_weights()));
- s.args().append("bw_projection_bias", tbl->lookup(node->bw_projection_bias()));
-
- s.args().append("fw_activation_state", tbl->lookup(node->fw_activation_state()));
- s.args().append("fw_cell_state", tbl->lookup(node->fw_cell_state()));
- s.args().append("bw_activation_state", tbl->lookup(node->bw_activation_state()));
- s.args().append("bw_cell_state", tbl->lookup(node->bw_cell_state()));
-
- s.args().append("auxillary_input", tbl->lookup(node->auxillary_input()));
- s.args().append("fw_auxillary_input_to_input_weights",
- tbl->lookup(node->fw_auxillary_input_to_input_weights()));
- s.args().append("fw_auxillary_input_to_forget_weights",
- tbl->lookup(node->fw_auxillary_input_to_forget_weights()));
- s.args().append("fw_auxillary_input_to_cell_weights",
- tbl->lookup(node->fw_auxillary_input_to_cell_weights()));
- s.args().append("fw_auxillary_input_to_output_weights",
- tbl->lookup(node->fw_auxillary_input_to_output_weights()));
- s.args().append("bw_auxillary_input_to_input_weights",
- tbl->lookup(node->bw_auxillary_input_to_input_weights()));
- s.args().append("bw_auxillary_input_to_forget_weights",
- tbl->lookup(node->bw_auxillary_input_to_forget_weights()));
- s.args().append("bw_auxillary_input_to_cell_weights",
- tbl->lookup(node->bw_auxillary_input_to_cell_weights()));
- s.args().append("bw_auxillary_input_to_output_weights",
- tbl->lookup(node->bw_auxillary_input_to_output_weights()));
-
- s.args().append("cell_clip", to_str(node->cell_clip()));
- s.args().append("proj_clip", to_str(node->proj_clip()));
- s.args().append("merge_outputs", to_str(node->merge_outputs()));
- s.args().append("time_major", to_str(node->time_major()));
- s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
- locop::NodeSummary &s)
-{
- s.args().append("x", tbl->lookup(node->x()));
- s.args().append("in_data_type", to_str(node->in_data_type()));
- s.args().append("out_data_type", to_str(node->out_data_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- for (uint32_t i = 0; i < node->numValues(); ++i)
- s.args().append("values", tbl->lookup(node->values(i)));
- s.args().append("axis", pepper::str(node->axis()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("filter", tbl->lookup(node->filter()));
- s.args().append("bias", tbl->lookup(node->bias()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
- locop::NodeSummary &s)
-{
- for (uint32_t i = 0; i < node->numInputs(); i++)
- {
- s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
- }
- s.args().append("custom_code", node->custom_code());
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("block_size", std::to_string(node->block_size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("filter", tbl->lookup(node->filter()));
- s.args().append("bias", tbl->lookup(node->bias()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("axis", tbl->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFakeQuant *node,
- locop::NodeSummary &s)
-{
- s.args().append("inputs", tbl->lookup(node->inputs()));
- s.args().append("min", pepper::str(node->min()));
- s.args().append("max", pepper::str(node->max()));
- s.args().append("num_bits", pepper::str(node->num_bits()));
- s.args().append("narrow_range", node->narrow_range() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
- locop::NodeSummary &s)
-{
- s.args().append("dims", tbl->lookup(node->dims()));
- s.args().append("value", tbl->lookup(node->value()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("weights", tbl->lookup(node->weights()));
- s.args().append("bias", tbl->lookup(node->bias()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
- locop::NodeSummary &s)
-{
- s.args().append("params", tbl->lookup(node->params()));
- s.args().append("indices", tbl->lookup(node->indices()));
- s.args().append("axis", pepper::str(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
- locop::NodeSummary &s)
-{
- s.args().append("params", tbl->lookup(node->params()));
- s.args().append("indices", tbl->lookup(node->indices()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
-{
- s.args().append("cond", tbl->lookup(node->cond()));
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl->lookup(node->input(i)));
-
- if (node->then_graph() != nullptr)
- s.args().append("then_graph", node->then_graph()->name());
- else
- s.args().append("then_branch", pepper::str(node->then_branch()));
-
- if (node->else_graph() != nullptr)
- s.args().append("else_graph", node->else_graph()->name());
- else
- s.args().append("else_branch", pepper::str(node->else_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
- locop::NodeSummary &s)
-{
- s.args().append("x", tbl->lookup(node->x()));
- s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Pool2D *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
- locop::NodeSummary &s)
-{
- s.args().append("features", tbl->lookup(node->features()));
- s.args().append("alpha", std::to_string(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("radius", pepper::str(node->radius()));
- s.args().append("bias", pepper::str(node->bias()));
- s.args().append("alpha", pepper::str(node->alpha()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
- locop::NodeSummary &s)
-{
- s.args().append("logits", tbl->lookup(node->logits()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
- locop::NodeSummary &s)
-{
- s.args().append("diagonal", tbl->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("diagonal", tbl->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("paddings", tbl->lookup(node->paddings()));
- s.args().append("mode", to_str(node->mode()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
- locop::NodeSummary &s)
-{
- s.args().append("boxes", tbl->lookup(node->boxes()));
- s.args().append("scores", tbl->lookup(node->scores()));
- s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
- s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
- s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
- locop::NodeSummary &s)
-{
- s.args().append("boxes", tbl->lookup(node->boxes()));
- s.args().append("scores", tbl->lookup(node->scores()));
- s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
- s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
- s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
- s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
- locop::NodeSummary &s)
-{
- s.args().append("indices", tbl->lookup(node->indices()));
- s.args().append("depth", tbl->lookup(node->depth()));
- s.args().append("on_value", tbl->lookup(node->on_value()));
- s.args().append("off_value", tbl->lookup(node->off_value()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
- locop::NodeSummary &s)
-{
- for (uint32_t i = 0; i < node->values_count(); ++i)
- s.args().append("values", tbl->lookup(node->values(i)));
- s.args().append("values_count", pepper::str(node->values_count()));
- s.args().append("axis", pepper::str(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("paddings", tbl->lookup(node->paddings()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("paddings", tbl->lookup(node->paddings()));
- s.args().append("constant_values", tbl->lookup(node->constant_values()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("alpha", tbl->lookup(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
- locop::NodeSummary &s)
-{
- s.args().append("start", tbl->lookup(node->start()));
- s.args().append("limit", tbl->lookup(node->limit()));
- s.args().append("delta", tbl->lookup(node->delta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
- locop::NodeSummary &s)
-{
- s.args().append("tensor", tbl->lookup(node->tensor()));
- s.args().append("shape", tbl->lookup(node->shape()));
- // TODO Show newShape info
- s.state(locop::NodeSummary::State::PartiallyKnown);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("size", tbl->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("size", tbl->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
- s.args().append("seq_axis", std::to_string(node->seq_axis()));
- s.args().append("batch_axis", std::to_string(node->batch_axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
- locop::NodeSummary &s)
-{
- s.args().append("tensor", tbl->lookup(node->tensor()));
- s.args().append("axis", tbl->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
- locop::NodeSummary &s)
-{
- s.args().append("indices", tbl->lookup(node->indices()));
- s.args().append("updates", tbl->lookup(node->updates()));
- s.args().append("shape", tbl->lookup(node->shape()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
- locop::NodeSummary &s)
-{
- s.args().append("condition", tbl->lookup(node->condition()));
- s.args().append("t", tbl->lookup(node->t()));
- s.args().append("e", tbl->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
- locop::NodeSummary &s)
-{
- s.args().append("condition", tbl->lookup(node->condition()));
- s.args().append("t", tbl->lookup(node->t()));
- s.args().append("e", tbl->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("out_type", to_str(node->out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("begin", tbl->lookup(node->begin()));
- s.args().append("size", tbl->lookup(node->size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
- locop::NodeSummary &s)
-{
- s.args().append("logits", tbl->lookup(node->logits()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("block_shape", tbl->lookup(node->block_shape()));
- s.args().append("paddings", tbl->lookup(node->paddings()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("block_size", pepper::str(node->block_size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
- locop::NodeSummary &s)
-{
- s.args().append("indices", tbl->lookup(node->indices()));
- s.args().append("output_shape", tbl->lookup(node->output_shape()));
- s.args().append("values", tbl->lookup(node->values()));
- s.args().append("default_value", tbl->lookup(node->default_value()));
- s.args().append("Validate_indices", pepper::str(node->validate_indices()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
- locop::NodeSummary &s)
-{
- s.args().append("split_dim", tbl->lookup(node->split_dim()));
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("num_split", pepper::str(node->num_split()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("size_splits", tbl->lookup(node->size_splits()));
- s.args().append("split_dim", tbl->lookup(node->split_dim()));
- s.args().append("num_split", pepper::str(node->num_split()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
-
- std::stringstream ss{"("};
- for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
- {
- if (i != 0)
- ss << ", ";
- ss << node->squeeze_dims()[i];
- }
- ss << ")";
- s.args().append("squeeze_dims", ss.str());
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("begin", tbl->lookup(node->begin()));
- s.args().append("end", tbl->lookup(node->end()));
- s.args().append("strides", tbl->lookup(node->strides()));
- s.args().append("begin_mask", pepper::str(node->begin_mask()));
- s.args().append("end_mask", pepper::str(node->end_mask()));
- s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
- s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
- s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("multiples", tbl->lookup(node->multiples()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("k", tbl->lookup(node->k()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
- locop::NodeSummary &s)
-{
- s.args().append("a", tbl->lookup(node->a()));
- s.args().append("perm", tbl->lookup(node->perm()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
- locop::NodeSummary &s)
-{
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
- s.args().append("filter", tbl->lookup(node->filter()));
- s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
- s.args().append("bias", tbl->lookup(node->bias()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnidirectionalSequenceLSTM *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
-
- s.args().append("input_to_input_weights", tbl->lookup(node->input_to_input_weights()));
- s.args().append("input_to_forget_weights", tbl->lookup(node->input_to_forget_weights()));
- s.args().append("input_to_cell_weights", tbl->lookup(node->input_to_cell_weights()));
- s.args().append("input_to_output_weights", tbl->lookup(node->input_to_output_weights()));
-
- s.args().append("recurrent_to_input_weights", tbl->lookup(node->recurrent_to_input_weights()));
- s.args().append("recurrent_to_forget_weights", tbl->lookup(node->recurrent_to_forget_weights()));
- s.args().append("recurrent_to_cell_weights", tbl->lookup(node->recurrent_to_cell_weights()));
- s.args().append("recurrent_to_output_weights", tbl->lookup(node->recurrent_to_output_weights()));
-
- s.args().append("cell_to_input_weights", tbl->lookup(node->cell_to_input_weights()));
- s.args().append("cell_to_forget_weights", tbl->lookup(node->cell_to_forget_weights()));
- s.args().append("cell_to_output_weights", tbl->lookup(node->cell_to_output_weights()));
-
- s.args().append("input_gate_bias", tbl->lookup(node->input_gate_bias()));
- s.args().append("forget_gate_bias", tbl->lookup(node->forget_gate_bias()));
- s.args().append("cell_gate_bias", tbl->lookup(node->cell_gate_bias()));
- s.args().append("output_gate_bias", tbl->lookup(node->output_gate_bias()));
-
- s.args().append("projection_weights", tbl->lookup(node->projection_weights()));
- s.args().append("projection_bias", tbl->lookup(node->projection_bias()));
-
- s.args().append("activation_state", tbl->lookup(node->activation_state()));
- s.args().append("cell_state", tbl->lookup(node->cell_state()));
-
- s.args().append("input_layer_norm_coefficients",
- tbl->lookup(node->input_layer_norm_coefficients()));
- s.args().append("forget_layer_norm_coefficients",
- tbl->lookup(node->forget_layer_norm_coefficients()));
- s.args().append("cell_layer_norm_coefficients",
- tbl->lookup(node->cell_layer_norm_coefficients()));
- s.args().append("output_layer_norm_coefficients",
- tbl->lookup(node->output_layer_norm_coefficients()));
-
- s.args().append("cell_clip", to_str(node->cell_clip()));
- s.args().append("proj_clip", to_str(node->proj_clip()));
- s.args().append("time_major", to_str(node->time_major()));
- s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
- locop::NodeSummary &s)
-{
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("idx_out_type", to_str(node->idx_out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
- locop::NodeSummary &s)
-{
- s.args().append("value", tbl->lookup(node->value()));
- s.args().append("num", pepper::str(node->num()));
- s.args().append("axis", pepper::str(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
- locop::NodeSummary &s)
-{
- s.args().append("condition", tbl->lookup(node->condition()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
- locop::NodeSummary &s)
-{
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl->lookup(node->input(i)));
-
- if (node->cond_graph() != nullptr)
- s.args().append("cond_graph", node->cond_graph()->name());
- else
- s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
- if (node->body_graph() != nullptr)
- s.args().append("body_graph", node->body_graph()->name());
- else
- s.args().append("body_branch", pepper::str(node->body_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
- locop::NodeSummary &s)
-{
- s.args().append("topkv2", tbl->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
- locop::NodeSummary &s)
-{
- s.args().append("unique", tbl->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
- locop::NodeSummary &s)
-{
- s.args().append("unpack", tbl->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
- locop::NodeSummary &s)
-{
- s.args().append("while", tbl->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
- locop::NodeSummary &s)
-{
- s.args().append("from", tbl->lookup(node->from()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *, const luci::CircleOutputDummy *,
- locop::NodeSummary &s)
-{
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *, const luci::CircleOutputExclude *,
- locop::NodeSummary &s)
-{
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
- locop::NodeSummary &s)
-{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
- s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
- s.args().append("bias", tbl->lookup(node->bias()));
- s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
- locop::NodeSummary &s)
-{
- s.args().append("input_scales", tbl->lookup(node->input_scales()));
- s.args().append("input_binary", tbl->lookup(node->input_binary()));
- s.args().append("indices", tbl->lookup(node->indices()));
- s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
- s.args().append("axis", pepper::str(node->axis()));
- s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
- locop::NodeSummary &s)
-{
- auto fused = node->fusedActivationFunction();
- assert(fused != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl->lookup(node->input()));
- s.args().append("gamma", tbl->lookup(node->gamma()));
- s.args().append("beta", tbl->lookup(node->beta()));
- s.args().append("epsilon", pepper::str(node->epsilon()));
- s.args().append("fused_activation_function", to_str(fused));
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-// SummaryBuilderLet type
-enum class SB
-{
- ABC,
- DEF,
- GHIJ,
- KLMN,
- OPQR,
- STUV,
- WXYZ,
- CIRC, // circle only
- VIRT, // virtual
-};
-
-template <SB sb> class SummaryBuilderLet;
-
-#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
-
-template <> class SummaryBuilderLet<SB::ABC> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleAbs)
- IMPLEMENT(luci::CircleAdd)
- IMPLEMENT(luci::CircleAddN)
- IMPLEMENT(luci::CircleArgMax)
- IMPLEMENT(luci::CircleArgMin)
- IMPLEMENT(luci::CircleAveragePool2D)
- IMPLEMENT(luci::CircleBatchMatMul)
- IMPLEMENT(luci::CircleBatchToSpaceND)
- IMPLEMENT(luci::CircleBidirectionalSequenceLSTM)
- IMPLEMENT(luci::CircleCast)
- IMPLEMENT(luci::CircleCeil)
- IMPLEMENT(luci::CircleConcatenation)
- IMPLEMENT(luci::CircleConst)
- IMPLEMENT(luci::CircleConv2D)
- IMPLEMENT(luci::CircleCos)
- IMPLEMENT(luci::CircleCustom)
-};
-
-template <> class SummaryBuilderLet<SB::DEF> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleDepthToSpace)
- IMPLEMENT(luci::CircleDepthwiseConv2D)
- IMPLEMENT(luci::CircleDequantize)
- IMPLEMENT(luci::CircleDiv)
- IMPLEMENT(luci::CircleElu)
- IMPLEMENT(luci::CircleEqual)
- IMPLEMENT(luci::CircleExp)
- IMPLEMENT(luci::CircleExpandDims)
- IMPLEMENT(luci::CircleFakeQuant)
- IMPLEMENT(luci::CircleFill)
- IMPLEMENT(luci::CircleFloor)
- IMPLEMENT(luci::CircleFloorDiv)
- IMPLEMENT(luci::CircleFloorMod)
- IMPLEMENT(luci::CircleFullyConnected)
-};
-
-template <> class SummaryBuilderLet<SB::GHIJ> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleGather)
- IMPLEMENT(luci::CircleGatherNd)
- IMPLEMENT(luci::CircleGreater)
- IMPLEMENT(luci::CircleGreaterEqual)
- IMPLEMENT(luci::CircleIf)
-};
-
-template <> class SummaryBuilderLet<SB::KLMN> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleL2Normalize)
- IMPLEMENT(luci::CircleL2Pool2D)
- IMPLEMENT(luci::CircleLeakyRelu)
- IMPLEMENT(luci::CircleLess)
- IMPLEMENT(luci::CircleLessEqual)
- IMPLEMENT(luci::CircleLocalResponseNormalization)
- IMPLEMENT(luci::CircleLog)
- IMPLEMENT(luci::CircleLogicalAnd)
- IMPLEMENT(luci::CircleLogicalNot)
- IMPLEMENT(luci::CircleLogicalOr)
- IMPLEMENT(luci::CircleLogistic)
- IMPLEMENT(luci::CircleLogSoftmax)
- IMPLEMENT(luci::CircleMatrixDiag)
- IMPLEMENT(luci::CircleMatrixSetDiag)
- IMPLEMENT(luci::CircleMaximum)
- IMPLEMENT(luci::CircleMaxPool2D)
- IMPLEMENT(luci::CircleMean)
- IMPLEMENT(luci::CircleMinimum)
- IMPLEMENT(luci::CircleMirrorPad)
- IMPLEMENT(luci::CircleMul)
- IMPLEMENT(luci::CircleNeg)
- IMPLEMENT(luci::CircleNonMaxSuppressionV4)
- IMPLEMENT(luci::CircleNonMaxSuppressionV5)
- IMPLEMENT(luci::CircleNotEqual)
-};
-
-template <> class SummaryBuilderLet<SB::OPQR> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleOneHot)
- IMPLEMENT(luci::CirclePack)
- IMPLEMENT(luci::CirclePad)
- IMPLEMENT(luci::CirclePadV2)
- IMPLEMENT(luci::CirclePow)
- IMPLEMENT(luci::CirclePRelu)
- IMPLEMENT(luci::CircleQuantize)
- IMPLEMENT(luci::CircleRange)
- IMPLEMENT(luci::CircleRank)
- IMPLEMENT(luci::CircleReduceAny)
- IMPLEMENT(luci::CircleReduceMax)
- IMPLEMENT(luci::CircleReduceMin)
- IMPLEMENT(luci::CircleReduceProd)
- IMPLEMENT(luci::CircleRelu)
- IMPLEMENT(luci::CircleRelu6)
- IMPLEMENT(luci::CircleReluN1To1)
- IMPLEMENT(luci::CircleReshape)
- IMPLEMENT(luci::CircleResizeBilinear)
- IMPLEMENT(luci::CircleResizeNearestNeighbor)
- IMPLEMENT(luci::CircleReverseSequence)
- IMPLEMENT(luci::CircleReverseV2)
- IMPLEMENT(luci::CircleRound)
- IMPLEMENT(luci::CircleRsqrt)
-};
-
-template <> class SummaryBuilderLet<SB::STUV> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleScatterNd)
- IMPLEMENT(luci::CircleSegmentSum)
- IMPLEMENT(luci::CircleSelect)
- IMPLEMENT(luci::CircleSelectV2)
- IMPLEMENT(luci::CircleShape)
- IMPLEMENT(luci::CircleSin)
- IMPLEMENT(luci::CircleSlice)
- IMPLEMENT(luci::CircleSoftmax)
- IMPLEMENT(luci::CircleSpaceToBatchND)
- IMPLEMENT(luci::CircleSpaceToDepth)
- IMPLEMENT(luci::CircleSparseToDense)
- IMPLEMENT(luci::CircleSplit)
- IMPLEMENT(luci::CircleSplitV)
- IMPLEMENT(luci::CircleSqrt)
- IMPLEMENT(luci::CircleSquare)
- IMPLEMENT(luci::CircleSquaredDifference)
- IMPLEMENT(luci::CircleSqueeze)
- IMPLEMENT(luci::CircleStridedSlice)
- IMPLEMENT(luci::CircleSub)
- IMPLEMENT(luci::CircleSum)
- IMPLEMENT(luci::CircleTanh)
- IMPLEMENT(luci::CircleTile)
- IMPLEMENT(luci::CircleTopKV2)
- IMPLEMENT(luci::CircleTranspose)
- IMPLEMENT(luci::CircleTransposeConv)
- IMPLEMENT(luci::CircleUnidirectionalSequenceLSTM)
- IMPLEMENT(luci::CircleUnique)
- IMPLEMENT(luci::CircleUnpack)
-};
-
-template <> class SummaryBuilderLet<SB::WXYZ> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleWhere)
- IMPLEMENT(luci::CircleWhile)
- IMPLEMENT(luci::CircleZerosLike)
-};
-
-template <> class SummaryBuilderLet<SB::CIRC> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleBCQFullyConnected)
- IMPLEMENT(luci::CircleBCQGather)
- IMPLEMENT(luci::CircleInstanceNorm)
-};
-
-template <> class SummaryBuilderLet<SB::VIRT> final : public CircleNodeSummaryBuilderBase
-{
-public:
- SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
- {
- // DO NOTHING
- }
-
-private:
- IMPLEMENT(luci::CircleInput)
- IMPLEMENT(luci::CircleOutput)
- IMPLEMENT(luci::CircleCustomOut)
- IMPLEMENT(luci::CircleIfOut)
- IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
- IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
- IMPLEMENT(luci::CircleOutputDummy)
- IMPLEMENT(luci::CircleOutputExclude)
- IMPLEMENT(luci::CircleSplitOut)
- IMPLEMENT(luci::CircleSplitVOut)
- IMPLEMENT(luci::CircleTopKV2Out)
- IMPLEMENT(luci::CircleUniqueOut)
- IMPLEMENT(luci::CircleUnpackOut)
- IMPLEMENT(luci::CircleWhileOut)
-};
-
-#undef IMPLEMENT
-
-bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
-{
- if (node->dialect() != luci::CircleDialect::get())
- return false;
-
- auto ptr_to_str = [](const void *ptr) {
- std::stringstream ss;
- ss << ptr;
- return ss.str();
- };
-
- auto add_comment = [&]() {
- auto cnode = loco::must_cast<const luci::CircleNode *>(node);
- s.opname(circle_opname(node->opnum()));
- s.comments().append("[" + cnode->name() + "] = " + ptr_to_str(node));
- };
-
-#define CIRCLE_NODE(OPCODE, CLASS) \
- if (dynamic_cast<const CLASS *>(node)) \
- { \
- if (summary(dynamic_cast<const CLASS *>(node), s)) \
- { \
- add_comment(); \
- return true; \
- } \
- }
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
- return false;
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
-{
- return use_xy_act(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMax *node,
- locop::NodeSummary &s) const
-{
- return use_ido(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMin *node,
- locop::NodeSummary &s) const
-{
- return use_ido(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAveragePool2D *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchMatMul *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchToSpaceND *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBidirectionalSequenceLSTM *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConcatenation *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConst *, locop::NodeSummary &s) const
-{
- s.state(locop::NodeSummary::State::PartiallyKnown);
- return true;
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConv2D *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCustom *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthToSpace *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthwiseConv2D *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDequantize *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleElu *node, locop::NodeSummary &s) const
-{
- return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleEqual *node, locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExp *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExpandDims *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFakeQuant *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorDiv *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorMod *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFullyConnected *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGather *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGatherNd *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreater *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreaterEqual *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Normalize *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Pool2D *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLessEqual *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLeakyRelu *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLocalResponseNormalization *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalAnd *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalNot *node,
- locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalOr *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogistic *node,
- locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogSoftmax *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixDiag *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixSetDiag *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaximum *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaxPool2D *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMinimum *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMirrorPad *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
-{
- return use_xy_act(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNeg *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV4 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV5 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNotEqual *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleOneHot *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePadV2 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePRelu *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleQuantize *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRange *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceAny *node,
- locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMax *node,
- locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMin *node,
- locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceProd *node,
- locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu *node, locop::NodeSummary &s) const
-{
- return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu6 *node,
- locop::NodeSummary &s) const
-{
- return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReluN1To1 *node,
- locop::NodeSummary &s) const
-{
- return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReshape *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeBilinear *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeNearestNeighbor *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseSequence *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseV2 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRound *node,
- locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRsqrt *node,
- locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleScatterNd *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSegmentSum *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelect *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelectV2 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleShape *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSlice *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSoftmax *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToBatchND *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToDepth *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSparseToDense *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplit *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplitV *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquare *node,
- locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquaredDifference *node,
- locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqueeze *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleStridedSlice *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
-{
- return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSum *node, locop::NodeSummary &s) const
-{
- return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTanh *node, locop::NodeSummary &s) const
-{
- return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTopKV2 *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTranspose *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTransposeConv *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnidirectionalSequenceLSTM *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnique *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnpack *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhere *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhile *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleZerosLike *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQFullyConnected *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQGather *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleInstanceNorm *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleInput *, locop::NodeSummary &s) const
-{
- s.state(locop::NodeSummary::State::Complete);
- return true;
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutput *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleCustomOut *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleIfOut *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV4Out *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV5Out *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputDummy *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputExclude *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitOut *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitVOut *node,
- locop::NodeSummary &s) const
-{
- return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleTopKV2Out *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUniqueOut *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUnpackOut *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleWhileOut *node,
- locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
-} // namespace
-
namespace luci
{
@@ -2208,22 +36,10 @@ bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) co
return true;
}
-#define BUILD_GRP(GRP) \
- do \
- { \
- if (SummaryBuilderLet<SB::GRP>(_tbl).build(node, s)) \
- return true; \
- } while (false)
-
- BUILD_GRP(ABC);
- BUILD_GRP(DEF);
- BUILD_GRP(GHIJ);
- BUILD_GRP(KLMN);
- BUILD_GRP(OPQR);
- BUILD_GRP(STUV);
- BUILD_GRP(WXYZ);
- BUILD_GRP(CIRC);
- BUILD_GRP(VIRT);
+ if (CircleNodeSummaryBuilder().build(node, _tbl, s))
+ {
+ return true;
+ }
return false;
}
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
index ec8e0b0d6..f28207df2 100644
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -13,7 +13,7 @@ target_link_libraries(luci_partition PUBLIC luci_lang)
target_link_libraries(luci_partition PRIVATE luci_service)
target_link_libraries(luci_partition PRIVATE luci_log)
target_link_libraries(luci_partition PRIVATE luci_logex)
-target_link_libraries(luci_partition PRIVATE mio_circle)
+target_link_libraries(luci_partition PRIVATE mio_circle04)
target_link_libraries(luci_partition PRIVATE nncc_common)
target_link_libraries(luci_partition PRIVATE pepper_csv2vec)
target_link_libraries(luci_partition PRIVATE oops)
diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h
index ebbff7a6a..e60567c69 100644
--- a/compiler/luci/partition/src/ConnectNode.h
+++ b/compiler/luci/partition/src/ConnectNode.h
@@ -161,6 +161,7 @@ public:
void visit(const luci::CircleSquaredDifference *) final;
void visit(const luci::CircleSqueeze *) final;
void visit(const luci::CircleStridedSlice *) final;
+ void visit(const luci::CircleSVDF *) final;
void visit(const luci::CircleSub *) final;
void visit(const luci::CircleSum *) final;
void visit(const luci::CircleTanh *) final;
@@ -197,6 +198,7 @@ public:
void visit(const luci::CircleTopKV2Out *) final;
void visit(const luci::CircleUniqueOut *) final;
void visit(const luci::CircleUnpackOut *) final;
+ void visit(const luci::CircleVariable *) final;
void visit(const luci::CircleWhileOut *) final;
public:
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..f661a794c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSVDF *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleSVDF *>(cn->find_clone(node));
+
+ luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+ luci::CircleNode *weight_feature = loco::must_cast<luci::CircleNode *>(node->weight_feature());
+ luci::CircleNode *weight_time = loco::must_cast<luci::CircleNode *>(node->weight_time());
+ luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+ luci::CircleNode *input_activation_state =
+ loco::must_cast<luci::CircleNode *>(node->input_activation_state());
+
+ cloned->input(cn->find_clone(input));
+ cloned->weight_feature(cn->find_clone(weight_feature));
+ cloned->weight_time(cn->find_clone(weight_time));
+ cloned->bias(cn->find_clone(bias));
+ cloned->input_activation_state(cn->find_clone(input_activation_state));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSVDF *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..5fae5206e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSVDF>
+{
+public:
+ NodeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ NodeGraphletT<luci::CircleSVDF>::init(g);
+
+ _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ }
+};
+
+class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape);
+ NodeGraphlet::init(g());
+
+ node()->input(input(0));
+ node()->weight_feature(input(1));
+ node()->weight_time(input(2));
+ node()->bias(input(3));
+ node()->input_activation_state(input(4));
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SVDF)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(5, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+ ASSERT_EQ(cth.inputs(1), clone->arg(1));
+ ASSERT_EQ(cth.inputs(2), clone->arg(2));
+ ASSERT_EQ(cth.inputs(3), clone->arg(3));
+ ASSERT_EQ(cth.inputs(4), clone->arg(4));
+}
+
+TEST(ConnectNodeTest, connect_SVDF_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..f7f6f21fd
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleVariable *)
+{
+ // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionIRDump.cpp b/compiler/luci/partition/src/PartitionIRDump.cpp
index 4f2c26800..0fabfc416 100644
--- a/compiler/luci/partition/src/PartitionIRDump.cpp
+++ b/compiler/luci/partition/src/PartitionIRDump.cpp
@@ -32,18 +32,18 @@ void dump(std::ostream &os, const PNode *pnode)
void dump(std::ostream &os, const PGroup *pgroup)
{
os << "--- PGroup: " << pgroup->group << std::endl;
- os << "Input(s): ";
+ os << "Input(s): [ ";
for (auto &node_in : pgroup->inputs)
os << node_in->name() << " ";
- os << std::endl;
+ os << "]" << std::endl;
for (auto &pnode : pgroup->pnodes)
{
dump(os, pnode.get());
}
- os << "Output(s): ";
+ os << "Output(s): [ ";
for (auto &node_out : pgroup->outputs)
os << node_out->name() << " ";
- os << std::endl;
+ os << "]" << std::endl;
}
void dump(std::ostream &os, const PGroups *pgroups)
@@ -57,7 +57,8 @@ void dump(std::ostream &os, const PGroups *pgroups)
{
auto node = it->first;
auto group = it->second;
- os << " Node: " << node << "(" << node->name() << "): " << group << std::endl;
+ os << " Node: " << node << "(" << luci::opcode_name(node) << "," << node->name()
+ << "): " << group << std::endl;
}
}
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
index c517bf93f..4c3971bd8 100644
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -58,9 +58,6 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
// we need to clone this CircleConst for each graph of the group.
if (dynamic_cast<const luci::CircleConst *>(input) != nullptr)
continue;
- // Skip also for OutputExclude
- if (dynamic_cast<const luci::CircleOutputExclude *>(input) != nullptr)
- continue;
auto input_group = pgroups->group_of(input);
// NOTE: all the nodes should be registered and return should be valid group.
@@ -87,7 +84,7 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
input_pgroup = pgroup_input;
else
{
- if (input_pgroup != pgroup_input)
+ if (input_pgroup->group != pgroup_input->group)
return false;
}
}
@@ -96,6 +93,48 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
}
/**
+ * @brief return true if there is only one output and is fed to same group of nodes
+ * @note pgroups is used to find group of pgroup
+ * ex)
+ * /-- pgroup_user_1 (grp_1)
+ * --- pgroup
+ * \-- pgroup_user_2 (grp_2)
+ *
+ * return false if grp_1 != grp_2
+ */
+bool is_output_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+ assert(pgroups != nullptr);
+ assert(pgroup != nullptr);
+
+ std::string group;
+ for (auto &output : pgroup->outputs)
+ {
+ // get output_group
+ auto output_group = pgroups->group_of(output);
+ assert(not output_group.empty());
+ if (output_group.empty())
+ output_group = pgroups->default_group;
+
+ // find all PGroup that uses output
+ for (auto &pgroup_user : pgroups->pgroups)
+ {
+ for (auto &user_inputs : pgroup_user->inputs)
+ {
+ if (output == user_inputs)
+ {
+ // OK, these are connected, check group is same
+ if (pgroup_user->group != output_group)
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
* @brief merge pgroup into pgroup_i
* @note output of pgroup_i should be input of pgroup
*/
@@ -191,6 +230,9 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
// skip if there are multiple inputs but inputs differ in group
if (!is_input_same(pgroup.get(), d_pgroups.get()))
continue;
+ // skip if pgroup has different group for other users of pgroup_i
+ if (!is_output_same(pgroup_i.get(), d_pgroups.get()))
+ continue;
// TODO add more condition may be needed
merge_into(pgroup.get(), pgroup_i.get());
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
index 0080873e6..eaeacf9c4 100644
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -46,6 +46,9 @@ public:
bool visit(const luci::CircleUniqueOut *) final { return true; }
bool visit(const luci::CircleUnpackOut *) final { return true; }
bool visit(const luci::CircleWhileOut *) final { return true; }
+ // For inputs not used
+ bool visit(const luci::CircleOutputExclude *) final { return true; }
+ bool visit(const luci::CircleVariable *) final { return true; }
// TODO add all virtual nodes
// default is false
@@ -69,59 +72,80 @@ bool check_allocate_partition(const luci::CircleNode *node)
return true;
}
-class FindGroupToFollow final : public luci::CircleNodeVisitor<const std::string &>
+} // namespace
+
+namespace
{
-public:
- FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups)
- : _partition(partition), _pgroups(pgroups)
- {
- // NOTHING TODO
- }
-private:
- const std::string &groupof(const luci::CircleNode *input) const
+std::string group_from_partition(const luci::CircleNode *node,
+ const luci::PartitionTable &partition)
+{
+ LOGGER(l);
+
+ auto group = partition.default_group;
+
+ std::string opcodename; // opcodename or opname
+
+ switch (partition.comply)
{
- auto group = _pgroups->node2group[input];
- assert(not group.empty());
- if (group.empty())
- return _partition.default_group;
- return _pgroups->node2group[input];
+ case luci::PartitionTable::COMPLY::OPCODE:
+ {
+ opcodename = luci::opcode_name(node);
+ assert(!opcodename.empty());
+
+ auto it = partition.byopcodes.find(opcodename);
+ if (it != partition.byopcodes.end())
+ group = it->second;
+ break;
+ }
+ case luci::PartitionTable::COMPLY::OPNAME:
+ {
+ opcodename = node->name();
+ assert(!opcodename.empty());
+
+ auto it = partition.byopnames.find(opcodename);
+ if (it != partition.byopnames.end())
+ group = it->second;
+ break;
+ }
+
+ default:
+ throw std::runtime_error("Unsupported partition.comply");
}
+ INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
+ << std::endl;
+
+ return group;
+}
+
+class IsVirtualInputNode final : public luci::CircleNodeVisitor<bool>
+{
public:
-#define IMPLEMENT(CLASS) \
- const std::string &visit(const luci::CLASS *node) final \
- { \
- auto input = loco::must_cast<luci::CircleNode *>(node->input()); \
- return groupof(input); \
- }
+ // TODO check CircleOutputDummy
+ bool visit(const luci::CircleOutputExclude *) final { return true; }
+ bool visit(const luci::CircleVariable *) final { return true; }
- IMPLEMENT(CircleCustomOut);
- IMPLEMENT(CircleIfOut);
- IMPLEMENT(CircleNonMaxSuppressionV4Out);
- IMPLEMENT(CircleNonMaxSuppressionV5Out);
- IMPLEMENT(CircleSplitOut);
- IMPLEMENT(CircleSplitVOut);
- IMPLEMENT(CircleTopKV2Out);
- IMPLEMENT(CircleUniqueOut);
- IMPLEMENT(CircleUnpackOut);
- IMPLEMENT(CircleWhileOut);
-
-#undef IMPLEMENT
-
- // return empty for nothing to do
- const std::string &visit(const luci::CircleNode *) final { return _empty_str; }
-
-private:
- const luci::PartitionTable &_partition;
- luci::PGroups *_pgroups = nullptr;
- std::string _empty_str;
+ // default is false
+ bool visit(const luci::CircleNode *) final { return false; }
};
-} // namespace
-
-namespace
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
{
+public:
+ bool visit(const luci::CircleCustom *) final { return true; }
+ bool visit(const luci::CircleIf *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+ bool visit(const luci::CircleSplit *) final { return true; }
+ bool visit(const luci::CircleSplitV *) final { return true; }
+ bool visit(const luci::CircleTopKV2 *) final { return true; }
+ bool visit(const luci::CircleUnique *) final { return true; }
+ bool visit(const luci::CircleUnpack *) final { return true; }
+ bool visit(const luci::CircleWhile *) final { return true; }
+ // default is false
+ bool visit(const luci::CircleNode *) final { return false; }
+};
void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx)
{
@@ -136,17 +160,56 @@ void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &g
pgroup->pnodes.push_back(std::move(pnode));
+ IsVirtualInputNode queryvi;
// Set input of PGroup
for (uint32_t in = 0; in < node->arity(); ++in)
{
auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
- // this input maybe CircleInput in source graph
- // --> not confident this is safe
- pgroup->inputs.push_back(input);
+ if (input->accept(&queryvi))
+ {
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = input;
+ pnode->group = group;
+ pnode->pgroup = pgroup.get();
+
+ pgroup->pnodes.push_back(std::move(pnode));
+
+ pgroups->node2group[input] = group;
+ }
+ else
+ {
+ // this input maybe CircleInput in source graph
+ // --> not confident this is safe
+ pgroup->inputs.push_back(input);
+ }
+ }
+
+ IsMultiOutputNode query;
+ if (node->accept(&query))
+ {
+ // Include CircleXXXOut virtual nodes in this group
+ auto succs = loco::succs(node);
+ for (auto &succ_node : succs)
+ {
+ auto nodeout = loco::must_cast<luci::CircleNode *>(succ_node);
+
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = nodeout;
+ pnode->group = group;
+ pnode->pgroup = pgroup.get();
+
+ pgroup->pnodes.push_back(std::move(pnode));
+
+ pgroups->node2group[nodeout] = group;
+
+ pgroup->outputs.push_back(nodeout);
+ }
+ }
+ else
+ {
+ // Set output of PGroup: node itself
+ pgroup->outputs.push_back(node);
}
- // Set output of PGroup: node itself or multiple virtual outputs
- // TODO support multiple virtual outputs
- pgroup->outputs.push_back(node);
pgroups->node2group[node] = group;
pgroups->id2pgroup[pgroup->id] = pgroup.get();
@@ -182,70 +245,9 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
// check if node is normal node that we are interested
if (check_allocate_partition(node))
{
- auto group = partition.default_group;
-
- std::string opcodename; // opcodename or opname
-
- switch (partition.comply)
- {
- case luci::PartitionTable::COMPLY::OPCODE:
- {
- opcodename = luci::opcode_name(node);
- assert(!opcodename.empty());
-
- auto it = partition.byopcodes.find(opcodename);
- if (it != partition.byopcodes.end())
- group = it->second;
- break;
- }
- case luci::PartitionTable::COMPLY::OPNAME:
- {
- opcodename = node->name();
- assert(!opcodename.empty());
-
- auto it = partition.byopnames.find(opcodename);
- if (it != partition.byopnames.end())
- group = it->second;
- break;
- }
-
- default:
- throw std::runtime_error("Unsupported partition.comply");
- }
-
- INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
- << std::endl;
+ auto group = group_from_partition(node, partition);
append(node, pgroups.get(), group, idx);
-#if 0
- auto pgroup = std::make_unique<luci::PGroup>();
- pgroup->group = group;
- pgroup->id = idx + 1;
-
- auto pnode = std::make_unique<luci::PNode>();
- pnode->node = node;
- pnode->group = group;
- pnode->pgroup = pgroup.get();
-
- pgroup->pnodes.push_back(std::move(pnode));
-
- // Set input of PGroup
- for (uint32_t in = 0; in < node->arity(); ++in)
- {
- auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
- // this input maybe CircleInput in source graph
- // --> not confident this is safe
- pgroup->inputs.push_back(input);
- }
- // Set output of PGroup: node itself or multiple virtual outputs
- // TODO support multiple virtual outputs
- pgroup->outputs.push_back(node);
-
- pgroups->node2group[node] = group;
- pgroups->id2pgroup[pgroup->id] = pgroup.get();
-
- pgroups->pgroups.push_back(std::move(pgroup));
-#endif
}
else
{
@@ -255,22 +257,6 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
}
}
- // handle for virtual nodes like multiple outputs
- // these nodes should follow group of the input
- for (uint32_t idx = 0; idx < nodes->size(); ++idx)
- {
- auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
-
- // for virtual nodes like CircleUnpackOut should follow it's input (owner)
- // or just set to default
- FindGroupToFollow query(partition, pgroups.get());
- const auto &group = node->accept(&query);
- if (not group.empty())
- {
- append(node, pgroups.get(), group, idx);
- }
- }
-
return std::move(pgroups);
}
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index b8b406a38..5237c6d3f 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "FlatBuffers NOT FOUND")
return()
@@ -23,11 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log)
target_link_libraries(luci_pass PRIVATE luci_service)
target_link_libraries(luci_pass PRIVATE luci_logex)
target_link_libraries(luci_pass PRIVATE luci_profile)
-target_link_libraries(luci_pass PRIVATE mio_tflite260_inc)
+target_link_libraries(luci_pass PRIVATE mio_tflite280_inc)
target_link_libraries(luci_pass PRIVATE nncc_common)
target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
target_link_libraries(luci_pass PRIVATE oops)
-target_link_libraries(luci_pass PRIVATE flatbuffers-1.12)
+target_link_libraries(luci_pass PRIVATE flatbuffers-2.0)
install(TARGETS luci_pass DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
@@ -43,5 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src)
target_link_libraries(luci_pass_test luci_pass)
target_link_libraries(luci_pass_test luci_lang)
target_link_libraries(luci_pass_test luci_testhelper)
-target_link_libraries(luci_pass_test flatbuffers-1.12)
+target_link_libraries(luci_pass_test flatbuffers-2.0)
#target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 658563ecf..c803898f6 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -47,15 +47,12 @@ public:
ResolveCustomOpBatchMatMul,
ResolveCustomOpMatMul,
ResolveCustomOpMaxPoolWithArgmax,
- QuantizeDequantizeWeights,
- QuantizeWithMinMax,
- Requantize,
FoldAddV2,
FoldCast,
FoldDepthwiseConv2D,
FoldDequantize,
+ FoldGather,
FoldSparseToDense,
- ForceQuantParam,
ForwardReshapeToUnaryOp,
SparsifyTensorPass,
FusePreActivationBatchNorm,
@@ -79,6 +76,7 @@ public:
TransformMinReluToRelu6Pass,
SubstituteStridedSliceToReshape,
SubstituteTransposeToReshape,
+ RemoveRedundantQuantize,
RemoveRedundantReshape,
RemoveFakeQuant,
RemoveQuantDequantSeq,
@@ -86,16 +84,6 @@ public:
enum AlgorithmParameters
{
- // quantize
- Quantize_input_model_dtype,
- Quantize_output_model_dtype,
- Quantize_granularity, // layer-wise or channel-wise
- Quantize_tensor_names,
- Quantize_scales,
- Quantize_zero_points,
- Quantize_input_type,
- Quantize_output_type,
-
// sparsify
Sparsify_tensor_name,
Sparsify_traversal_order,
@@ -114,8 +102,6 @@ public:
virtual bool query(Algorithm) = 0;
virtual void param(AlgorithmParameters, const std::string &) = 0;
virtual const std::string param(AlgorithmParameters) const = 0;
- virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
- virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
};
public:
@@ -127,8 +113,6 @@ public:
void optimize(loco::Graph *) const;
- void quantize(loco::Graph *) const;
-
void sparsify(loco::Graph *) const;
private:
diff --git a/compiler/luci/pass/include/luci/CircleQuantizer.h b/compiler/luci/pass/include/luci/CircleQuantizer.h
new file mode 100644
index 000000000..4e7074d98
--- /dev/null
+++ b/compiler/luci/pass/include/luci/CircleQuantizer.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_QUANTIZER_H__
+#define __LUCI_CIRCLE_QUANTIZER_H__
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleQuantizer final
+{
+public:
+ struct Options
+ {
+ struct LayerParam
+ {
+ std::string name;
+ std::string dtype;
+ std::string granularity;
+ };
+
+ enum Algorithm
+ {
+ QuantizeDequantizeWeights,
+ QuantizeWithMinMax,
+ Requantize,
+ CopyQuantParam,
+ ForceQuantParam,
+ ConvertToFakeQuantizedModel,
+ };
+
+ enum AlgorithmParameters
+ {
+ // quantize
+ Quantize_input_model_dtype,
+ Quantize_output_model_dtype,
+ Quantize_granularity, // layer-wise or channel-wise
+ Quantize_tensor_names,
+ Quantize_scales,
+ Quantize_zero_points,
+ Quantize_layer_params,
+
+ // copy_quantparam
+ Quantize_src_tensor_names,
+ Quantize_dst_tensor_names,
+
+ Quantize_input_type,
+ Quantize_output_type,
+ Quantize_TF_style_maxpool,
+ };
+
+ virtual ~Options() = default;
+
+ virtual void enable(Algorithm) = 0;
+ virtual bool query(Algorithm) = 0;
+ virtual void param(AlgorithmParameters, const std::string &) = 0;
+ virtual const std::string param(AlgorithmParameters) const = 0;
+ virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
+ virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
+
+ // Quantization parameters for multiple layers
+ virtual void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) = 0;
+ virtual std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const = 0;
+ };
+
+public:
+ // TODO maybe caller can provide Options as ctor parameters
+ Options *options(void);
+
+public:
+ void quantize(loco::Graph *) const;
+
+private:
+ std::unique_ptr<Options> _options;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_QUANTIZER_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h
new file mode 100644
index 000000000..91dd2300e
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+#define __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to convert a quantized model to a fake-quantized fp32 model.
+ */
+struct ConvertToFakeQuantizedModelPass final : public logo::Pass
+{
+ ConvertToFakeQuantizedModelPass() {}
+
+ const char *name(void) const final { return "luci::ConvertToFakeQuantizedModelPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h
new file mode 100644
index 000000000..18c9cd56a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_COPY_QUANT_PARAM_PASS_H__
+#define __LUCI_COPY_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to copy quantparam (scale, zerop) of a tensor to another tensor
+ */
+class CopyQuantParamPass : public logo::Pass
+{
+public:
+ using TensorVector = std::vector<std::string>;
+
+public:
+ CopyQuantParamPass(TensorVector &src_tensors, TensorVector &dst_tensors)
+ : _src_tensors{src_tensors}, _dst_tensors{dst_tensors}
+ {
+ // DO NOTHING
+ }
+ virtual const char *name(void) const { return "luci::CopyQuantParamPass"; }
+
+public:
+ bool run(loco::Graph *graph);
+
+private:
+ TensorVector _src_tensors;
+ TensorVector _dst_tensors;
+};
+
+} // namespace luci
+
+#endif //__LUCI_COPY_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h
new file mode 100644
index 000000000..de08c8845
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_GATHER_PASS_H__
+#define __LUCI_FOLD_GATHER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold Gather to a constant tensor
+ *
+ */
+struct FoldGatherPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldGatherPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_GATHER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h
new file mode 100644
index 000000000..0c489fc30
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to propagate quantization parameters of an operator's output to input
+ */
+struct PropagateQParamBackwardPass final : public logo::Pass
+{
+ PropagateQParamBackwardPass(loco::DataType output) : _output_model_dtype(output) {}
+
+ const char *name(void) const final { return "luci::PropagateQParamBackwardPass"; }
+
+ bool run(loco::Graph *g) final;
+
+private:
+ loco::DataType _output_model_dtype;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h
index 7e0c44b8c..952bd9614 100644
--- a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
-#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
+#ifndef __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
#include <logo/Pass.h>
@@ -23,15 +23,22 @@ namespace luci
{
/**
- * @brief Class to propagate quantization parameters of an operator's output to input
+ * @brief Class to propagate quantization parameters of an operator's input to output
*/
-struct PropagateQuantParamPass final : public logo::Pass
+struct PropagateQParamForwardPass final : public logo::Pass
{
- const char *name(void) const final { return "luci::PropagateQuantParamPass"; }
+ PropagateQParamForwardPass(bool TF_style_maxpool) : _TF_style_maxpool(TF_style_maxpool) {}
+
+ PropagateQParamForwardPass() {}
+
+ const char *name(void) const final { return "luci::PropagateQParamForwardPass"; }
bool run(loco::Graph *g) final;
+
+private:
+ bool _TF_style_maxpool = false;
};
} // namespace luci
-#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
+#endif // __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
index 5c9cd427f..30c8db058 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
@@ -17,6 +17,10 @@
#ifndef __LUCI_QUANTIZATION_PARAMETERS_H__
#define __LUCI_QUANTIZATION_PARAMETERS_H__
+#include <loco.h>
+
+#include <string>
+
namespace luci
{
@@ -26,6 +30,13 @@ enum QuantizationGranularity
ChannelWise = 1,
};
+struct LayerInfo
+{
+ std::string name;
+ loco::DataType dtype;
+ QuantizationGranularity granularity;
+};
+
} // namespace luci
#endif // __LUCI_QUANTIZATION_PARAMETERS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
index 68765ec5b..1825ee1aa 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -32,12 +32,30 @@ namespace luci
class QuantizeDequantizeWeightsPass : public logo::Pass
{
public:
+ struct Context
+ {
+ loco::DataType input_model_dtype = loco::DataType::Unknown;
+ loco::DataType output_model_dtype = loco::DataType::Unknown;
+ QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+ std::vector<LayerInfo> layers_info;
+ };
+
+public:
+ QuantizeDequantizeWeightsPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+ {
+ // DO NOTHING
+ }
+
+public:
QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
QuantizationGranularity granularity)
- : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
- granularity}
{
- // DO NOTHING
+ _ctx = std::make_unique<Context>();
+ {
+ _ctx->input_model_dtype = input_model_dtype;
+ _ctx->output_model_dtype = output_model_dtype;
+ _ctx->granularity = granularity;
+ }
}
virtual const char *name(void) const { return "luci::QuantizeDequantizeWeightsPass"; }
@@ -45,9 +63,7 @@ public:
bool run(loco::Graph *graph);
private:
- loco::DataType _input_model_dtype;
- loco::DataType _output_model_dtype;
- QuantizationGranularity _granularity;
+ std::unique_ptr<Context> _ctx;
};
} // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h
new file mode 100644
index 000000000..c852f88e0
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+#define __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to verify the input model has the form acceptable by quantizer
+ */
+class QuantizePreCheckerPass : public logo::Pass
+{
+public:
+ const char *name(void) const final { return "luci::QuantizePreCheckerPass"; }
+
+public:
+ bool run(loco::Graph *graph) final;
+};
+
+} // namespace luci
+
+#endif //__LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
index 648abad70..ea6db85d1 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -23,6 +23,8 @@
#include <luci/Pass/QuantizationParameters.h>
+#include <vector>
+
namespace luci
{
@@ -31,26 +33,41 @@ namespace luci
*/
class QuantizeWithMinMaxPass : public logo::Pass
{
+public:
+ struct Context
+ {
+ loco::DataType input_model_dtype = loco::DataType::Unknown;
+ loco::DataType output_model_dtype = loco::DataType::Unknown;
+ QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+ loco::DataType input_type = loco::DataType::Unknown;
+ loco::DataType output_type = loco::DataType::Unknown;
+ bool TF_style_maxpool = false;
+ std::vector<LayerInfo> layers_info;
+ };
+
// For backward-compatibility
// TODO Remove this constructor
public:
QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
QuantizationGranularity granularity)
- : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
- _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype}
{
- // DO NOTHING
+ _ctx = std::make_unique<Context>();
+ {
+ _ctx->input_model_dtype = input_model_dtype;
+ _ctx->output_model_dtype = output_model_dtype;
+ _ctx->granularity = granularity;
+ _ctx->input_type = output_model_dtype;
+ _ctx->output_type = output_model_dtype;
+ _ctx->TF_style_maxpool = false;
+ }
}
public:
- QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
- QuantizationGranularity granularity, loco::DataType input_type,
- loco::DataType output_type)
- : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
- _granularity{granularity}, _input_type{input_type}, _output_type{output_type}
+ QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
{
// DO NOTHING
}
+
virtual const char *name(void) const { return "luci::QuantizeWithMinMaxPass"; }
public:
@@ -61,11 +78,7 @@ private:
void set_output_type(loco::Graph *graph) const;
private:
- loco::DataType _input_model_dtype;
- loco::DataType _output_model_dtype;
- QuantizationGranularity _granularity;
- loco::DataType _input_type;
- loco::DataType _output_type;
+ std::unique_ptr<Context> _ctx;
};
} // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h
new file mode 100644
index 000000000..3e76bcdc3
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove redundant quantize operations
+ */
+struct RemoveRedundantQuantizePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveRedundantQuantizePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
index c1a06bfda..e3f126b15 100644
--- a/compiler/luci/pass/src/BatchNormPatternFinder.cpp
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
@@ -44,10 +44,26 @@ bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::C
return false;
}
- if (constant->rank() != 1)
+ uint32_t channel_dim = 0;
+
+ if (constant->rank() == 1)
+ {
+ channel_dim = constant->dim(0).value();
+ }
+ else if (constant->rank() == 4)
+ {
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ if (constant->dim(i).value() != 1)
+ return false;
+ }
+ channel_dim = constant->dim(3).value();
+ }
+ else
+ {
return false;
+ }
- auto channel_dim = constant->dim(0);
// Assumption: Layout is channel-last
if (!(channel_dim == add->dim(add->rank() - 1)))
return false;
@@ -90,10 +106,26 @@ bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
return false;
}
- if (constant->rank() != 1)
+ uint32_t channel_dim = 0;
+
+ if (constant->rank() == 1)
+ {
+ channel_dim = constant->dim(0).value();
+ }
+ else if (constant->rank() == 4)
+ {
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ if (constant->dim(i).value() != 1)
+ return false;
+ }
+ channel_dim = constant->dim(3).value();
+ }
+ else
+ {
return false;
+ }
- auto channel_dim = constant->dim(0);
// Assumption: Layout is channel-last
if (!(channel_dim == mul->dim(mul->rank() - 1)))
return false;
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
index 08e7fac1c..cc8c5615f 100644
--- a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
@@ -50,7 +50,7 @@ public:
auto channel_size = *last_it;
_add->shape(shape);
- _add_beta->shape({channel_size});
+ set_beta_shape(channel_size);
_add_beta->size<loco::DataType::FLOAT32>(channel_size);
for (uint32_t i = 0; i < channel_size; i++)
_add_beta->at<loco::DataType::FLOAT32>(i) = i;
@@ -63,10 +63,23 @@ public:
luci::CircleAdd *add() { return _add; }
protected:
+ virtual void set_beta_shape(uint32_t channel) = 0;
+
+protected:
luci::CircleAdd *_add = nullptr;
luci::CircleConst *_add_beta = nullptr;
};
+class AddRank1BetaGraphlet : public AddBetaGraphlet
+{
+ void set_beta_shape(uint32_t channel) final { _add_beta->shape({channel}); }
+};
+
+class AddRank4BetaGraphlet : public AddBetaGraphlet
+{
+ void set_beta_shape(uint32_t channel) final { _add_beta->shape({1, 1, 1, channel}); }
+};
+
/**
* @brief Graphlet with Mul and Const as gamma from BatchNorm
*/
@@ -90,7 +103,7 @@ public:
auto channel_size = *last_it;
_mul->shape(shape);
- _mul_gamma->shape({channel_size});
+ set_gamma_shape(channel_size);
_mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
for (uint32_t i = 0; i < channel_size; i++)
_mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
@@ -103,14 +116,27 @@ public:
luci::CircleMul *mul(void) { return _mul; }
protected:
+ virtual void set_gamma_shape(uint32_t channel) = 0;
+
+protected:
luci::CircleMul *_mul = nullptr;
luci::CircleConst *_mul_gamma = nullptr;
};
+class MulRank1GammaGraphlet : public MulGammaGraphlet
+{
+ void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({channel}); }
+};
+
+class MulRank4GammaGraphlet : public MulGammaGraphlet
+{
+ void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({1, 1, 1, channel}); }
+};
+
/**
* @brief Graph of Mul-Add pattern from BatchNorm
*/
-class MulAddGraph : public TestIOGraph, public AddBetaGraphlet, public MulGammaGraphlet
+class MulAddGraph : public TestIOGraph, public AddRank1BetaGraphlet, public MulRank1GammaGraphlet
{
public:
MulAddGraph() = default;
@@ -118,8 +144,30 @@ public:
void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
{
TestIOGraph::init(shape_in, shape_out);
- MulGammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
- AddBetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+ MulRank1GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+ AddRank1BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+ // connect network
+ _mul->x(input());
+ _mul->y(_mul_gamma);
+ _add->x(_mul);
+ _add->y(_add_beta);
+ output()->from(_add);
+ }
+};
+
+class MulAddRank4Graph : public TestIOGraph,
+ public AddRank4BetaGraphlet,
+ public MulRank4GammaGraphlet
+{
+public:
+ MulAddRank4Graph() = default;
+
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ MulRank4GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+ AddRank4BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
// connect network
_mul->x(input());
@@ -133,7 +181,7 @@ public:
/**
* @brief Graph of Add with Const
*/
-class AddGraph : public TestIOGraph, public AddBetaGraphlet
+class AddGraph : public TestIOGraph, public AddRank1BetaGraphlet
{
public:
AddGraph() = default;
@@ -141,7 +189,24 @@ public:
void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
{
TestIOGraph::init(shape_in, shape_out);
- AddBetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+ AddRank1BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+ // connect network
+ _add->x(input());
+ _add->y(_add_beta);
+ output()->from(_add);
+ }
+};
+
+class AddRank4Graph : public TestIOGraph, public AddRank4BetaGraphlet
+{
+public:
+ AddRank4Graph() = default;
+
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ AddRank4BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
// connect network
_add->x(input());
@@ -160,6 +225,7 @@ public:
protected:
luci::test::MulAddGraph _mag;
+ luci::test::MulAddRank4Graph _mag_r4;
};
class BatchNormPatternFinderAddTest : public ::testing::Test
@@ -169,6 +235,7 @@ public:
protected:
luci::test::AddGraph _ag;
+ luci::test::AddRank4Graph _ag_r4;
};
TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add)
@@ -192,6 +259,19 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2)
ASSERT_TRUE(res);
}
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add_rank4)
+{
+ _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+
+ auto res = luci::is_batchnorm_add(_mag_r4.add(), mul, beta);
+ ASSERT_TRUE(res);
+ ASSERT_NE(nullptr, mul);
+ ASSERT_NE(nullptr, beta);
+}
+
TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG)
{
_ag.init({1, 16, 16, 4}, {1, 16, 16, 4});
@@ -215,3 +295,16 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul)
ASSERT_NE(nullptr, pred);
ASSERT_NE(nullptr, gamma);
}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul_rank4)
+{
+ _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ luci::CircleNode *pred = nullptr;
+ luci::CircleConst *gamma = nullptr;
+
+ auto res = luci::is_batchnorm_mul(_mag_r4.mul(), pred, gamma);
+ ASSERT_TRUE(res);
+ ASSERT_NE(nullptr, pred);
+ ASSERT_NE(nullptr, gamma);
+}
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 75f04b3b5..6dbb22d7c 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -22,9 +22,9 @@
#include "luci/Pass/FoldCastPass.h"
#include "luci/Pass/FoldDepthwiseConv2DPass.h"
#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldGatherPass.h"
#include "luci/Pass/FoldSparseToDensePass.h"
#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
-#include "luci/Pass/ForceQuantParamPass.h"
#include "luci/Pass/FuseActivationFunctionPass.h"
#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
#include "luci/Pass/FuseAddWithTConvPass.h"
@@ -37,11 +37,11 @@
#include "luci/Pass/FusePreActivationBatchNormPass.h"
#include "luci/Pass/FuseTransposeWithMeanPass.h"
#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
-#include "luci/Pass/PropagateQuantParamPass.h"
#include "luci/Pass/RemoveFakeQuantPass.h"
#include "luci/Pass/RemoveQuantDequantSeqPass.h"
#include "luci/Pass/RemoveRedundantReshapePass.h"
#include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
#include "luci/Pass/RemoveUnnecessarySlicePass.h"
#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
@@ -52,9 +52,6 @@
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
-#include "luci/Pass/RequantizePass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
#include "luci/Pass/SparsifyTensorPass.h"
#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
#include "luci/Pass/SubstitutePackToReshapePass.h"
@@ -75,9 +72,6 @@
#include "ModulePhase.h"
#include "ProgressReporter.h"
-#include "helpers/Strings.h"
-
-#include "QuantizedModelVerifier.h"
#include <luci/IR/CircleNodes.h>
#include <logo/Phase.h>
@@ -91,37 +85,17 @@ namespace
using namespace luci;
-template <typename T> T lexical_cast(const std::string &str)
-{
- std::istringstream ss;
- ss.str(str);
- T data;
- ss >> data;
- return data;
-}
-
-template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
-{
- std::vector<T> result;
- std::transform(sv.begin(), sv.end(), std::back_inserter(result),
- [](std::string str) -> T { return lexical_cast<T>(str); });
- return result;
-}
-
class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
{
public:
void enable(Algorithm) final;
void param(AlgorithmParameters, const std::string &) final;
const std::string param(AlgorithmParameters) const final;
- void params(AlgorithmParameters, std::vector<std::string> &) final;
- std::vector<std::string> params(AlgorithmParameters) const final;
bool query(Algorithm) final;
private:
std::vector<Algorithm> _algorithms;
std::map<AlgorithmParameters, const std::string> _algorithm_params;
- std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
};
void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
@@ -144,24 +118,6 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const
}
}
-void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
-{
- _multiple_params[param] = vec;
-}
-
-std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const
-{
- auto param_vec = _multiple_params.find(param);
- if (param_vec != _multiple_params.end())
- {
- return param_vec->second;
- }
- else
- {
- return std::vector<std::string>();
- }
-}
-
bool OptimizeOptionsImpl::query(Algorithm algo)
{
std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
@@ -312,6 +268,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
}
+ if (_options->query(Options::Algorithm::FoldGather))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
+ }
if (_options->query(Options::Algorithm::FoldSparseToDense))
{
phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>());
@@ -368,6 +328,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
}
+ if (_options->query(Options::Algorithm::RemoveRedundantQuantize))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+ }
if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
{
phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
@@ -417,174 +381,6 @@ void CircleOptimizer::optimize(loco::Graph *g) const
phase_runner.run(phase);
}
-void CircleOptimizer::quantize(loco::Graph *g) const
-{
- // Fake quantization of weights
- if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
- {
- static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
- static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
- static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
-
- auto input_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
- auto output_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
- auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
-
- if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
- throw std::runtime_error("Unsupported input type. List of supported input type: " +
- to_string(fakeq_supported_input_model_dtype));
-
- if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
- throw std::runtime_error("Unsupported output type. List of supported output type: " +
- to_string(fakeq_supported_output_model_dtype));
-
- if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
- throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
- to_string(fakeq_supported_granularity));
-
- if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
- str_to_dtype(output_model_dtype) != loco::DataType::U8)
- throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
-
- // Clear existing quantparams before doing fake quantization
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- if (circle_node->quantparam() != nullptr)
- circle_node->quantparam(nullptr);
- }
-
- luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype),
- str_to_dtype(output_model_dtype),
- str_to_granularity(granularity));
- fake_quantizer.run(g);
- }
-
- // Actual quantization of weights, bias, and activation
- if (_options->query(Options::Algorithm::QuantizeWithMinMax))
- {
- static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
- static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
- static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
- static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
- static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
-
- auto input_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
- auto output_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
- auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
- auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
- if (input_type.empty())
- input_type = output_model_dtype;
- auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
- if (output_type.empty())
- output_type = output_model_dtype;
-
- if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
- throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(qwmm_supported_input_model_dtype));
-
- if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
- throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(qwmm_supported_output_model_dtype));
-
- if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
- throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
- to_string(qwmm_supported_granularity));
-
- if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
- throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(qwmm_supported_input_type));
-
- if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
- throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(qwmm_supported_output_type));
-
- if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
- str_to_dtype(output_model_dtype) != loco::DataType::U8)
- throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
-
- luci::QuantizeWithMinMaxPass quantizer(
- str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype),
- str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type));
- quantizer.run(g);
-
- // Post-quantization optimizations
- logo::Phase phase;
-
- phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>());
-
- phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
- phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
-
- ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
- logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
- phase_runner.attach(&prog);
- phase_runner.run(phase);
-
- // Verify the type/granularity of the quantized model
- luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype),
- str_to_granularity(granularity));
- verifier.verify(g);
- }
-
- // Requantize
- if (_options->query(Options::Algorithm::Requantize))
- {
- static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
- static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
-
- auto input_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
- auto output_model_dtype =
- _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
-
- if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
- throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(rq_supported_input_model_dtype));
-
- if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
- throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(rq_supported_output_model_dtype));
-
- luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
- str_to_dtype(output_model_dtype));
- requantizer.run(g);
- }
-
- // Force to write quantparam to specified tensors
- // NOTE Only per-tensor (not per-channel) qparam can be written
- if (_options->query(Options::Algorithm::ForceQuantParam))
- {
- ForceQuantParamPass::TensorVector tensors =
- _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
- auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
- auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
-
- // Cast scales/zero_points to proper types
- ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
- ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
-
- ForceQuantParamPass fq(tensors, scales, zero_points);
- fq.run(g);
- }
-
- logo::Phase phase;
-
- // Do Shape/Type inference
- phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
-
- ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
- logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
- phase_runner.attach(&prog);
- phase_runner.run(phase);
-}
-
void CircleOptimizer::sparsify(loco::Graph *g) const
{
if (_options->query(Options::Algorithm::SparsifyTensorPass))
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp
index a1b5c7f80..041fc7d75 100644
--- a/compiler/luci/pass/src/CircleOptimizer.test.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -71,171 +71,3 @@ TEST(CircleOptimizerTest, sparsify_simple)
SUCCEED();
}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_simple)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- o.quantize(&g);
-
- SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "invalid");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_simple)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- o.quantize(&g);
-
- SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
- options->param(AlgorithmParameters::Quantize_granularity, "invalid");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_requant_simple)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-
- o.quantize(&g);
-
- SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_requant_input_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_requant_output_NEG)
-{
- loco::Graph g;
- luci::CircleOptimizer o;
-
- auto options = o.options();
-
- options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
- options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
-
- EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp
new file mode 100644
index 000000000..ce38a90b9
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include "luci/Pass/CopyQuantParamPass.h"
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/RequantizePass.h"
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/QuantizePreCheckerPass.h"
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+// logo passes
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "ProgressReporter.h"
+#include "helpers/Strings.h"
+
+#include "QuantizedModelVerifier.h"
+
+#include <luci/IR/CircleNode.h>
+#include <logo/Phase.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace luci;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+
+template <typename T> T lexical_cast(const std::string &str)
+{
+ std::istringstream ss;
+ ss.str(str);
+ T data;
+ ss >> data;
+ return data;
+}
+
+template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
+{
+ std::vector<T> result;
+ std::transform(sv.begin(), sv.end(), std::back_inserter(result),
+ [](std::string str) -> T { return lexical_cast<T>(str); });
+ return result;
+}
+
+class QuantizeOptionsImpl final : public luci::CircleQuantizer::Options
+{
+public:
+ void enable(Algorithm) final;
+ void param(AlgorithmParameters, const std::string &) final;
+ const std::string param(AlgorithmParameters) const final;
+ void params(AlgorithmParameters, std::vector<std::string> &) final;
+ std::vector<std::string> params(AlgorithmParameters) const final;
+ void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) final;
+ std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const final;
+ bool query(Algorithm) final;
+
+private:
+ std::vector<Algorithm> _algorithms;
+ std::map<AlgorithmParameters, const std::string> _algorithm_params;
+ std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
+ std::map<AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>>> _layer_params;
+};
+
+void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
+
+void QuantizeOptionsImpl::param(AlgorithmParameters param, const std::string &str)
+{
+ _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
+}
+
+const std::string QuantizeOptionsImpl::param(AlgorithmParameters param) const
+{
+ auto param_str = _algorithm_params.find(param);
+ if (param_str != _algorithm_params.end())
+ {
+ return param_str->second;
+ }
+ else
+ {
+ return std::string();
+ }
+}
+
+void QuantizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
+{
+ _multiple_params[param] = vec;
+}
+
+std::vector<std::string> QuantizeOptionsImpl::params(AlgorithmParameters param) const
+{
+ auto param_vec = _multiple_params.find(param);
+ if (param_vec != _multiple_params.end())
+ {
+ return param_vec->second;
+ }
+ else
+ {
+ return std::vector<std::string>();
+ }
+}
+
+void QuantizeOptionsImpl::layer_params(AlgorithmParameters param,
+ std::vector<std::shared_ptr<LayerParam>> &vec)
+{
+ _layer_params[param] = vec;
+}
+
+std::vector<std::shared_ptr<LayerParam>>
+QuantizeOptionsImpl::layer_params(AlgorithmParameters param) const
+{
+ auto param_vec = _layer_params.find(param);
+ if (param_vec != _layer_params.end())
+ {
+ return param_vec->second;
+ }
+ else
+ {
+ return std::vector<std::shared_ptr<LayerParam>>();
+ }
+}
+
+bool QuantizeOptionsImpl::query(Algorithm algo)
+{
+ std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
+ if (it == _algorithms.end())
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleQuantizer::Options *CircleQuantizer::options(void)
+{
+ if (_options == nullptr)
+ {
+ _options = std::make_unique<QuantizeOptionsImpl>();
+ }
+
+ return _options.get();
+}
+
+void CircleQuantizer::quantize(loco::Graph *g) const
+{
+ // Fake quantization of weights
+ if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
+ {
+ static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
+ static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
+ static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
+
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+ auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+ auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+ if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input type: " +
+ to_string(fakeq_supported_input_model_dtype));
+
+ if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output type: " +
+ to_string(fakeq_supported_output_model_dtype));
+
+ if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
+ throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+ to_string(fakeq_supported_granularity));
+
+ if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+ str_to_dtype(output_model_dtype) != loco::DataType::U8)
+ throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+ // Check dtype/granularity of layer params
+ for (auto layer_param : layer_params)
+ {
+ auto name = layer_param->name;
+ if (!in_array(to_lower_case(layer_param->dtype), fakeq_supported_output_model_dtype))
+ {
+ throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+ to_string(fakeq_supported_output_model_dtype));
+ }
+ if (!in_array(to_lower_case(layer_param->granularity), fakeq_supported_granularity))
+ {
+ throw std::runtime_error(
+ "Unsupported granularity in " + name +
+ ". List of supported granularity: " + to_string(fakeq_supported_granularity));
+ }
+ }
+
+ // Clear existing quantparams before doing fake quantization
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->quantparam() != nullptr)
+ circle_node->quantparam(nullptr);
+ }
+
+ auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+ {
+ ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+ ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+ ctx->granularity = str_to_granularity(granularity);
+
+ for (auto layer_param : layer_params)
+ {
+ LayerInfo info;
+ {
+ info.name = layer_param->name;
+ info.dtype = str_to_dtype(layer_param->dtype);
+ info.granularity = str_to_granularity(layer_param->granularity);
+ }
+ ctx->layers_info.emplace_back(info);
+ }
+ }
+
+ luci::QuantizeDequantizeWeightsPass fake_quantizer(std::move(ctx));
+
+ fake_quantizer.run(g);
+ }
+
+ // Actual quantization of weights, bias, and activation
+ if (_options->query(Options::Algorithm::QuantizeWithMinMax))
+ {
+ static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
+ static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
+ static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
+ static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
+ static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
+
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+ auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+ auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
+ if (input_type.empty())
+ input_type = output_model_dtype;
+ auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
+ if (output_type.empty())
+ output_type = output_model_dtype;
+
+ bool TF_style_maxpool =
+ _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
+
+ auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+ if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(qwmm_supported_input_model_dtype));
+
+ if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(qwmm_supported_output_model_dtype));
+
+ if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
+ throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+ to_string(qwmm_supported_granularity));
+
+ if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(qwmm_supported_input_type));
+
+ if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(qwmm_supported_output_type));
+
+ if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+ str_to_dtype(output_model_dtype) != loco::DataType::U8)
+ throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+ // Check dtype/granularity of layer params
+ for (auto layer_param : layer_params)
+ {
+ auto name = layer_param->name;
+ if (!in_array(to_lower_case(layer_param->dtype), qwmm_supported_output_model_dtype))
+ {
+ throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+ to_string(qwmm_supported_output_model_dtype));
+ }
+ if (!in_array(to_lower_case(layer_param->granularity), qwmm_supported_granularity))
+ {
+ throw std::runtime_error(
+ "Unsupported granularity in " + name +
+ ". List of supported granularity: " + to_string(qwmm_supported_granularity));
+ }
+ }
+
+ // Input model checker for quantization
+ luci::QuantizePreCheckerPass input_model_checker{};
+ input_model_checker.run(g);
+
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+ ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+ ctx->granularity = str_to_granularity(granularity);
+ ctx->input_type = str_to_dtype(input_type);
+ ctx->output_type = str_to_dtype(output_type);
+ ctx->TF_style_maxpool = TF_style_maxpool;
+
+ for (auto layer_param : layer_params)
+ {
+ LayerInfo info;
+ {
+ info.name = layer_param->name;
+ info.dtype = str_to_dtype(layer_param->dtype);
+ info.granularity = str_to_granularity(layer_param->granularity);
+ }
+ ctx->layers_info.emplace_back(info);
+ }
+ }
+
+ luci::QuantizeWithMinMaxPass quantizer(std::move(ctx));
+
+ quantizer.run(g);
+
+ auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+ {
+ verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+ verify_ctx->granularity = str_to_granularity(granularity);
+ verify_ctx->input_type = str_to_dtype(input_type);
+ verify_ctx->output_type = str_to_dtype(output_type);
+ verify_ctx->TF_style_maxpool = TF_style_maxpool;
+
+ for (auto layer_param : layer_params)
+ {
+ LayerInfo info;
+ {
+ info.name = layer_param->name;
+ info.dtype = str_to_dtype(layer_param->dtype);
+ info.granularity = str_to_granularity(layer_param->granularity);
+ }
+ verify_ctx->layers_info.emplace_back(info);
+ }
+ }
+
+ // Verify the type/granularity of the quantized model
+ luci::QuantizedModelVerifier verifier(std::move(verify_ctx));
+
+ verifier.verify(g);
+ }
+
+ // Requantize
+ if (_options->query(Options::Algorithm::Requantize))
+ {
+ static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
+ static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
+
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+
+ if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(rq_supported_input_model_dtype));
+
+ if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(rq_supported_output_model_dtype));
+
+ luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
+ str_to_dtype(output_model_dtype));
+ requantizer.run(g);
+ }
+
+ // Force to write quantparam to specified tensors
+ // NOTE Only per-tensor (not per-channel) qparam can be written
+ if (_options->query(Options::Algorithm::ForceQuantParam))
+ {
+ ForceQuantParamPass::TensorVector tensors =
+ _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
+ auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
+ auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
+
+ // Cast scales/zero_points to proper types
+ ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
+ ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
+
+ ForceQuantParamPass fq(tensors, scales, zero_points);
+ fq.run(g);
+ }
+
+ // Copy quantparam of a tensor to another tensor
+ if (_options->query(Options::Algorithm::CopyQuantParam))
+ {
+ CopyQuantParamPass::TensorVector src_tensors =
+ _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
+ CopyQuantParamPass::TensorVector dst_tensors =
+ _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
+
+ CopyQuantParamPass cq(src_tensors, dst_tensors);
+ cq.run(g);
+ }
+
+ // Convert quantized model to fake-quantized model
+ if (_options->query(Options::Algorithm::ConvertToFakeQuantizedModel))
+ {
+ luci::ConvertToFakeQuantizedModelPass fake_quantizer;
+ fake_quantizer.run(g);
+
+ logo::Phase phase;
+
+ // Default passes
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+ // Fold Dequantize Ops generated during fake quantization
+ phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+ }
+
+ logo::Phase phase;
+
+ // Do Shape/Type inference
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleQuantizer.test.cpp b/compiler/luci/pass/src/CircleQuantizer.test.cpp
new file mode 100644
index 000000000..5766d5fe5
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.test.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+TEST(CircleQuantizerTest, quantize_quantdequant_simple)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_gran_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_simple)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_gran_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_simple)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_requant_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleQuantizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
index 270714049..ce4f54035 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -228,6 +228,9 @@ bool check_4d_reshape(loco::Node *node, const std::vector<int32_t> indices)
if (input->shape_status() != luci::ShapeStatus::VALID)
return false;
+ if (input->rank() != 4)
+ return false;
+
if (reshape->shape_status() != luci::ShapeStatus::VALID)
return false;
@@ -804,6 +807,8 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
return true;
}
+ bool visit(luci::CircleElu *node) { return convert_unary_features<luci::CircleElu>(node); }
+
bool visit(luci::CircleLeakyRelu *node)
{
return convert_unary_features<luci::CircleLeakyRelu>(node);
@@ -1240,6 +1245,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
break;
case luci::CircleOpcode::ADD:
case luci::CircleOpcode::CONCATENATION:
+ case luci::CircleOpcode::ELU:
case luci::CircleOpcode::LEAKY_RELU:
case luci::CircleOpcode::LOGISTIC:
case luci::CircleOpcode::MAXIMUM:
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
index c9412fbb1..dd81d1380 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -264,6 +264,22 @@ public:
luci::CircleConst *input2 = nullptr;
};
+class EluGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ elu = g.nodes()->create<luci::CircleElu>();
+ elu->features(input);
+ elu->name("elu");
+
+ return elu;
+ }
+
+public:
+ luci::CircleElu *elu = nullptr;
+};
+
class LeakyReluGraph final : public SimpleGraph
{
protected:
@@ -941,6 +957,26 @@ TEST(ConvertNCHWToNHWC, Concatenation)
EXPECT_EQ(3, g.concat->axis());
}
+TEST(ConvertNCHWToNHWC, Elu)
+{
+ EluGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.elu->features());
+
+ auto elu_succs = loco::succs(g.elu);
+ EXPECT_EQ(1, elu_succs.size());
+ check_post_trans(*elu_succs.begin());
+
+ // Check elu shape
+ EXPECT_EQ(1, g.elu->dim(0).value());
+ EXPECT_EQ(4, g.elu->dim(1).value());
+ EXPECT_EQ(4, g.elu->dim(2).value());
+ EXPECT_EQ(16, g.elu->dim(3).value());
+}
+
TEST(ConvertNCHWToNHWC, LeakyRelu)
{
LeakyReluGraph g;
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
new file mode 100644
index 000000000..11970fff5
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/QuantizationParameters.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+namespace
+{
+
+// Create Quantize Op whose dtype/shape/qparam are the same with node
+luci::CircleQuantize *create_quantize(luci::CircleNode *node)
+{
+ auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>();
+ quantize->name(node->name() + "_Quantize");
+ quantize->dtype(node->dtype());
+ quantize->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ quantize->dim(i).set(node->dim(i).value());
+
+ quantize->shape_status(luci::ShapeStatus::VALID);
+
+ copy_quantparam(node, quantize);
+
+ luci::add_origin(quantize, luci::get_origin(node));
+
+ return quantize;
+}
+
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
+{
+ auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+ dequantize->name(node->name() + "_Dequantize");
+ dequantize->dtype(loco::DataType::FLOAT32);
+ dequantize->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ dequantize->dim(i).set(node->dim(i).value());
+
+ dequantize->shape_status(luci::ShapeStatus::VALID);
+
+ luci::add_origin(dequantize, luci::get_origin(node));
+
+ return dequantize;
+}
+
+// Return true if node is quantized activation
+// 1. dtype is u8 or s16
+// 2. node has qparam
+bool is_quant_act(const luci::CircleNode *node)
+{
+ if (node->dtype() != loco::DataType::U8 and node->dtype() != loco::DataType::S16)
+ return false;
+
+ if (not node->quantparam())
+ return false;
+
+ return true;
+}
+
+// Return true if node is quantized const
+// 1. dtype is not fp32
+// 2. node has qparam
+// NOTE Quantized const can have the following types
+// u8 (weights, activation), s16 (weights, activation), s32 (bias), s64 (bias)
+bool is_quant_const(const luci::CircleConst *node)
+{
+ if (node->dtype() == loco::DataType::FLOAT32)
+ return false;
+
+ if (not node->quantparam())
+ return false;
+
+ return true;
+}
+
+// Insert dequantize Op after node
+void insert_dequantize(loco::Node *lnode)
+{
+ auto node = loco::must_cast<luci::CircleNode *>(lnode);
+ auto dequant = create_dequantize(node);
+ loco::replace(node).with(dequant);
+ dequant->input(node);
+}
+
+// Insert quantize Op after node and return the quantize Op
+luci::CircleQuantize *insert_quantize(loco::Node *lnode)
+{
+ auto node = loco::must_cast<luci::CircleNode *>(lnode);
+ auto quant = create_quantize(node);
+ loco::replace(node).with(quant);
+ quant->input(node);
+ return quant;
+}
+
+// Dequantize node
+void dequantize(luci::CircleNode *node)
+{
+ node->dtype(loco::DataType::FLOAT32);
+ node->quantparam(nullptr);
+}
+
+// Do fake quantization on quantized activation
+// 1. Insert Quantize-Dequantize Ops
+// 2. Update dtype/quantparam of node
+void fq_activation(luci::CircleNode *node)
+{
+ if (not is_quant_act(node))
+ return;
+
+ auto quant = insert_quantize(node);
+ insert_dequantize(quant);
+
+ dequantize(node);
+}
+
+#define RETURN_UNLESS(COND) \
+ if (not(COND)) \
+ return;
+
+// Visitor to do fake quantization for each Op
+// For non-const activation, insert Quantize-Dequantize after the ofm
+// For quantized const, insert Dequantize after the const
+struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
+{
+ void visit(luci::CircleNode *node)
+ {
+ throw std::runtime_error("Unsupported op for fake quantization in " + node->name());
+ }
+
+ void visit(luci::CircleInput *node)
+ {
+ RETURN_UNLESS(is_quant_act(node));
+
+ auto quant = insert_quantize(node);
+ insert_dequantize(quant);
+
+ dequantize(node);
+
+ // Update graph input
+ const auto inputs = node->graph()->inputs();
+ auto graph_input = inputs->at(node->index());
+ graph_input->dtype(loco::DataType::FLOAT32);
+ }
+
+ void visit(luci::CircleOutput *node)
+ {
+ RETURN_UNLESS(is_quant_act(node));
+
+ dequantize(node);
+
+ // Update graph output
+ const auto outputs = node->graph()->outputs();
+ auto graph_output = outputs->at(node->index());
+ graph_output->dtype(loco::DataType::FLOAT32);
+ }
+
+ // For quantized const, insert Dequantize Op
+ void visit(luci::CircleConst *node)
+ {
+ RETURN_UNLESS(is_quant_const(node));
+
+ insert_dequantize(node);
+ }
+
+ // For non-const activation, insert Quantize-Dequantize Ops
+ // and dequantize the node
+ void visit(luci::CircleConv2D *node) { fq_activation(node); }
+ void visit(luci::CircleAdd *node) { fq_activation(node); }
+};
+
+#undef RETURN_UNLESS
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertToFakeQuantizedModelPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ INFO(l) << "ConvertToFakeQuantizedModelPass visit node: " << circle_node->name() << std::endl;
+
+ FakeQuantize fq;
+ circle_node->accept(&fq);
+ }
+
+ // One time run
+ return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp
new file mode 100644
index 000000000..560d68a74
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// Check the below pattern
+// Quantize (scale, zp) -> Dequantize (node)
+void check_q_dq(loco::Node *node, float scale, int64_t zp)
+{
+ auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+ EXPECT_TRUE(dequant != nullptr);
+ auto quant = dynamic_cast<luci::CircleQuantize *>(dequant->input());
+ EXPECT_TRUE(quant != nullptr);
+ auto qparam = quant->quantparam();
+ EXPECT_EQ(scale, qparam->scale[0]);
+ EXPECT_EQ(zp, qparam->zerop[0]);
+}
+
+// Check the below pattern
+// Dequantize (node)
+void check_dq(loco::Node *node)
+{
+ auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+ EXPECT_TRUE(dequant != nullptr);
+}
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ {
+ qparam->scale.push_back(scale);
+ qparam->zerop.push_back(zp);
+ }
+ node->quantparam(std::move(qparam));
+}
+
+/**
+ * SimpleGraph for testing
+ * - Child class should implement insertGraphBody()
+ *
+ * Example (U8ConvGraph inherits SimpleGraph and create Conv2D Op)
+ *
+ * BEFORE
+ * - A model is quantized (ex: u8)
+ *
+ * [Input(u8)] [Filter(u8)] [Bias(s32)]
+ * \ | /
+ * \ | /
+ * \ | /
+ * [Conv2D(u8)]
+ * |
+ * [Output(u8)]
+ *
+ * AFTER
+ * - Ops are converted to fp32
+ * - Quantize/Dequantize Ops are inserted properly
+ * - Q-DQ is inserted after non-const activation
+ * - DQ is inserted after const
+ *
+ * [Input(u8)]
+ * |
+ * [Quant(u8)] [Filter(u8)] [Bias(s32)]
+ * | | |
+ * [Dequant(fp32)] [Dequant(fp32)] [Dequant(fp32)]
+ * \ | /
+ * \ | /
+ * \ | /
+ * [Conv2D(fp32)]
+ * |
+ * [Quant(u8)]
+ * |
+ * [Dequant(fp32)]
+ * |
+ * [Output(fp32)]
+ */
+template <loco::DataType T> class SimpleGraph
+{
+public:
+ void init()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ output = g.nodes()->create<luci::CircleOutput>();
+ input->name("input");
+ output->name("output");
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ graph_input->dtype(T);
+ input->dtype(T);
+ output->dtype(T);
+ graph_output->dtype(T);
+
+ graph_input->shape({1, 4, 4, 4});
+ input->shape({1, 4, 4, 4});
+ output->shape({1, 4, 4, 4});
+ graph_output->shape({1, 4, 4, 4});
+
+ set_qparam(input, 1.0, 0);
+ set_qparam(output, 1.0, 0);
+
+ auto graph_body = insertGraphBody(input);
+ output->from(graph_body);
+ }
+
+ virtual ~SimpleGraph() = default;
+
+protected:
+ virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class U8ConvGraph final : public SimpleGraph<loco::DataType::U8>
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ conv = g.nodes()->create<luci::CircleConv2D>();
+ weights = g.nodes()->create<luci::CircleConst>();
+ bias = g.nodes()->create<luci::CircleConst>();
+
+ conv->dtype(loco::DataType::U8);
+ weights->dtype(loco::DataType::U8);
+ bias->dtype(loco::DataType::S32);
+
+ conv->shape({1, 4, 4, 4});
+ weights->shape({4, 1, 1, 4});
+ bias->shape({4});
+
+ weights->size<loco::DataType::U8>(16);
+ for (uint32_t i = 0; i < 16; i++)
+ weights->at<loco::DataType::U8>(i) = i;
+
+ bias->size<loco::DataType::S32>(4);
+ for (uint32_t i = 0; i < 4; i++)
+ bias->at<loco::DataType::S32>(i) = i;
+
+ set_qparam(conv, 2.0, 127);
+ set_qparam(weights, 2.0, 127);
+ set_qparam(bias, 2.0, 127);
+
+ conv->input(input);
+ conv->filter(weights);
+ conv->bias(bias);
+
+ conv->name("conv");
+ weights->name("weights");
+ bias->name("bias");
+
+ return conv;
+ }
+
+public:
+ luci::CircleConv2D *conv = nullptr;
+ luci::CircleConst *weights = nullptr;
+ luci::CircleConst *bias = nullptr;
+};
+
+class FP32ConvGraph final : public SimpleGraph<loco::DataType::FLOAT32>
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ conv = g.nodes()->create<luci::CircleConv2D>();
+ weights = g.nodes()->create<luci::CircleConst>();
+ bias = g.nodes()->create<luci::CircleConst>();
+
+ conv->dtype(loco::DataType::FLOAT32);
+ weights->dtype(loco::DataType::FLOAT32);
+ bias->dtype(loco::DataType::FLOAT32);
+
+ conv->shape({1, 4, 4, 4});
+ weights->shape({4, 1, 1, 4});
+ bias->shape({4});
+
+ weights->size<loco::DataType::FLOAT32>(16);
+ for (uint32_t i = 0; i < 16; i++)
+ weights->at<loco::DataType::FLOAT32>(i) = i;
+
+ bias->size<loco::DataType::FLOAT32>(4);
+ for (uint32_t i = 0; i < 4; i++)
+ bias->at<loco::DataType::FLOAT32>(i) = i;
+
+ conv->input(input);
+ conv->filter(weights);
+ conv->bias(bias);
+
+ conv->name("conv");
+ weights->name("weights");
+ bias->name("bias");
+
+ return conv;
+ }
+
+public:
+ luci::CircleConv2D *conv = nullptr;
+ luci::CircleConst *weights = nullptr;
+ luci::CircleConst *bias = nullptr;
+};
+
+} // namespace
+
+TEST(ConvertToFakeQuantizedModelTest, U8Conv2D)
+{
+ U8ConvGraph g;
+ g.init();
+
+ luci::ConvertToFakeQuantizedModelPass fq;
+ fq.run(&g.g);
+
+ // Check ifm
+ check_q_dq(g.conv->input(), 1.0, 0);
+
+ // Check weights
+ check_dq(g.conv->filter());
+
+ // Check bias
+ check_dq(g.conv->bias());
+
+ // Check ofm
+ check_q_dq(g.output->from(), 2.0, 127);
+
+ SUCCEED();
+}
+
+TEST(ConvertToFakeQuantizedModelTest, F32Conv2D_NEG)
+{
+ FP32ConvGraph g;
+ g.init();
+
+ luci::ConvertToFakeQuantizedModelPass fq;
+ fq.run(&g.g);
+
+ uint32_t dequant_count = 0;
+ uint32_t quant_count = 0;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(&g.g)))
+ {
+ auto cnode = loco::must_cast<luci::CircleNode *>(node);
+ auto opcode = cnode->opcode();
+ if (opcode == luci::CircleOpcode::DEQUANTIZE)
+ dequant_count++;
+ if (opcode == luci::CircleOpcode::QUANTIZE)
+ quant_count++;
+ }
+
+ // Check no quant/dequant Op is inserted
+ EXPECT_EQ(0, quant_count);
+ EXPECT_EQ(0, dequant_count);
+}
diff --git a/compiler/luci/pass/src/CopyQuantParamPass.cpp b/compiler/luci/pass/src/CopyQuantParamPass.cpp
new file mode 100644
index 000000000..9b1bb0ea9
--- /dev/null
+++ b/compiler/luci/pass/src/CopyQuantParamPass.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CopyQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+struct SrcDst
+{
+ CircleNode *src = nullptr;
+ CircleNode *dst = nullptr;
+};
+
+} // namespace
+
+bool CopyQuantParamPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+
+ INFO(l) << "CopyQuantParamPass Start" << std::endl;
+
+ if (_src_tensors.size() != _dst_tensors.size())
+ throw std::runtime_error("The numbers of Source/Destination tensors do not match.");
+
+ // Return src/dst CircleNodes
+ auto get_src_dst = [&g](std::string src, std::string dst) {
+ SrcDst src_dst;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto const cnode = loco::must_cast<CircleNode *>(node);
+ auto const name = cnode->name();
+ if (name == src)
+ src_dst.src = cnode;
+
+ if (name == dst)
+ src_dst.dst = cnode;
+ }
+ return src_dst;
+ };
+
+ for (uint32_t i = 0; i < _src_tensors.size(); i++)
+ {
+ auto src = _src_tensors[i];
+ auto dst = _dst_tensors[i];
+
+ auto nodes = get_src_dst(src, dst);
+ if (not nodes.src)
+ throw std::runtime_error("The tensor named " + src + " does not exist.");
+
+ if (not nodes.dst)
+ throw std::runtime_error("The tensor named " + dst + " does not exist.");
+
+ copy_quantparam(nodes.src, nodes.dst);
+
+ INFO(l) << "Quantparam of " << src << " is copied to " << dst << std::endl;
+ }
+
+ INFO(l) << "CopyQuantParamPass End" << std::endl;
+
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldGatherPass.cpp b/compiler/luci/pass/src/FoldGatherPass.cpp
new file mode 100644
index 000000000..f179d74bd
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. params: const and dtype = S32 or S64
+ * 2. indices: const and dtype = S32 or S64
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * +---------[Gather]---------+
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ *
+ **/
+template <loco::DataType InputT, loco::DataType IndexT>
+bool fold_gather(luci::CircleGather *gather_node)
+{
+ const auto params = loco::must_cast<luci::CircleConst *>(gather_node->params());
+ const auto indices = loco::must_cast<luci::CircleConst *>(gather_node->indices());
+
+ const auto rank = params->rank();
+ auto axis = gather_node->axis();
+ if (axis < 0)
+ {
+ axis += static_cast<int32_t>(rank);
+ }
+
+ if (axis < 0 or axis >= static_cast<int32_t>(rank))
+ throw std::runtime_error("Unsupported axis value");
+
+ const auto name = gather_node->name();
+ assert(name.length() > 0);
+
+ auto constant = gather_node->graph()->nodes()->create<luci::CircleConst>();
+ constant->dtype(InputT);
+ constant->name(name + "_folded");
+
+ constant->rank(rank + indices->rank() - 1);
+
+ assert(constant->rank() > 0);
+
+ std::vector<uint32_t> shape;
+ for (uint32_t i = 0; i < rank; ++i)
+ {
+ if (i != static_cast<uint32_t>(axis))
+ {
+ const auto dim = params->dim(i).value();
+ shape.push_back(dim);
+ }
+ else
+ {
+ for (uint32_t j = 0; j < indices->rank(); ++j)
+ {
+ const auto dim = indices->dim(j).value();
+ shape.push_back(dim);
+ }
+ }
+ }
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ constant->dim(i).set(shape.at(i));
+ size *= shape.at(i);
+ }
+
+ constant->size<InputT>(size);
+
+ uint32_t outer_size = 1;
+ for (uint32_t i = 0; i < static_cast<uint32_t>(axis); ++i)
+ {
+ outer_size *= params->dim(i).value();
+ }
+
+ uint32_t inner_size = 1;
+ for (uint32_t i = axis + 1; i < rank; ++i)
+ {
+ inner_size *= params->dim(i).value();
+ }
+
+ uint32_t coord_size = 1;
+ for (uint32_t i = 0; i < indices->rank(); ++i)
+ {
+ coord_size *= indices->dim(i).value();
+ }
+
+ const auto axis_size = params->dim(axis).value();
+
+ for (uint32_t outer = 0; outer < outer_size; ++outer)
+ {
+ for (uint32_t i = 0; i < coord_size; ++i)
+ {
+ constant->at<InputT>((outer * coord_size + i) * inner_size) =
+ params->at<InputT>((outer * axis_size + indices->at<IndexT>(i)) * inner_size);
+ }
+ }
+ loco::replace(gather_node).with(constant);
+
+ return true;
+}
+
+bool fold_gather(luci::CircleGather *gather_node)
+{
+ const auto params = dynamic_cast<luci::CircleConst *>(gather_node->params());
+ if (not params)
+ return false;
+
+ const auto indices = dynamic_cast<luci::CircleConst *>(gather_node->indices());
+ if (not indices)
+ return false;
+
+ // TODO: support more types
+ if (params->dtype() != loco::DataType::S32 and params->dtype() != loco::DataType::S64)
+ return false;
+
+ if (indices->dtype() != loco::DataType::S32 and indices->dtype() != loco::DataType::S64)
+ throw std::runtime_error("Unsupported type");
+
+ if (params->dtype() == loco::DataType::S64)
+ {
+ if (indices->dtype() == loco::DataType::S64)
+ return fold_gather<loco::DataType::S64, loco::DataType::S64>(gather_node);
+ else
+ return fold_gather<loco::DataType::S64, loco::DataType::S32>(gather_node);
+ }
+ else
+ {
+ if (indices->dtype() == loco::DataType::S64)
+ return fold_gather<loco::DataType::S32, loco::DataType::S64>(gather_node);
+ else
+ return fold_gather<loco::DataType::S32, loco::DataType::S32>(gather_node);
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Gather Op
+ **/
+bool FoldGatherPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto gather_node = dynamic_cast<luci::CircleGather *>(node))
+ {
+ if (fold_gather(gather_node))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldGatherPass.test.cpp b/compiler/luci/pass/src/FoldGatherPass.test.cpp
new file mode 100644
index 000000000..b02c034a5
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *
+ * Graph that has a Gather S64 Op with const inputs
+ *
+ * BEFORE
+ * params: [Const] (shape: [3], values: [1, 2, 3])
+ * indices: [Const] (shape: [1], values: [1])
+ *
+ * [params] [indices]
+ * | |
+ * ---[Gather]---
+ *
+ * AFTER
+ * [Const] (shape: [1], values: [2])
+ *
+ */
+class S64FoldGatherSimpleTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+ S64FoldGatherSimpleTest() : luci::ConstantFoldingAddTestGraph({1}, loco::DataType::S64) {}
+
+ virtual void SetUp() { init(); }
+
+ loco::Node *createFoldedPattern() override
+ {
+ _gather = _g.nodes()->create<luci::CircleGather>();
+ _params = _g.nodes()->create<luci::CircleConst>();
+ _indices = _g.nodes()->create<luci::CircleConst>();
+
+ _gather->dtype(loco::DataType::S64);
+ _params->dtype(loco::DataType::S64);
+ _indices->dtype(loco::DataType::S64);
+
+ _params->shape({3});
+ _indices->shape({1});
+
+ _params->size<loco::DataType::S64>(3);
+ _params->at<loco::DataType::S64>(0) = 1;
+ _params->at<loco::DataType::S64>(1) = 2;
+ _params->at<loco::DataType::S64>(2) = 3;
+
+ _indices->size<loco::DataType::S64>(1);
+ _indices->at<loco::DataType::S64>(0) = 1;
+
+ _gather->params(_params);
+ _gather->indices(_indices);
+
+ _gather->name("gather");
+ _params->name("params");
+ _indices->name("indices");
+
+ return _gather;
+ }
+
+protected:
+ luci::CircleGather *_gather = nullptr;
+ luci::CircleConst *_params = nullptr;
+ luci::CircleConst *_indices = nullptr;
+};
+
+/**
+ *
+ * Graph that has a Gather S32 Op with axis = 1 and with const inputs
+ *
+ * BEFORE
+ * params: [Const] (shape: [2, 3], values: [0, 1, 2, 3, 4, 5])
+ * indices: [Const] (shape: [2], values: [2, 1])
+ *
+ * [params] [indices]
+ * | |
+ * ---[Gather]---
+ *
+ * AFTER
+ * [Const] (shape: [2, 2], values: [2, 1, 5, 4])
+ *
+ */
+
+class S32FoldGatherTwoDimsTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+ S32FoldGatherTwoDimsTest() : luci::ConstantFoldingAddTestGraph({4, 2}, loco::DataType::S32) {}
+
+ virtual void SetUp() { init(); }
+
+ loco::Node *createFoldedPattern() override
+ {
+ _gather = _g.nodes()->create<luci::CircleGather>();
+ _params = _g.nodes()->create<luci::CircleConst>();
+ _indices = _g.nodes()->create<luci::CircleConst>();
+
+ _gather->dtype(loco::DataType::S32);
+ _params->dtype(loco::DataType::S32);
+ _indices->dtype(loco::DataType::S32);
+
+ _params->shape({2, 3});
+ _indices->shape({2});
+
+ _params->size<loco::DataType::S32>(6);
+ _params->at<loco::DataType::S32>(0) = 0;
+ _params->at<loco::DataType::S32>(1) = 1;
+ _params->at<loco::DataType::S32>(2) = 2;
+ _params->at<loco::DataType::S32>(3) = 3;
+ _params->at<loco::DataType::S32>(4) = 4;
+ _params->at<loco::DataType::S32>(5) = 5;
+
+ _indices->size<loco::DataType::S32>(2);
+ _indices->at<loco::DataType::S32>(0) = 2;
+ _indices->at<loco::DataType::S32>(1) = 1;
+
+ _gather->params(_params);
+ _gather->indices(_indices);
+
+ _gather->axis(1);
+
+ _gather->name("gather");
+ _params->name("params");
+ _indices->name("indices");
+
+ return _gather;
+ }
+
+protected:
+ luci::CircleGather *_gather = nullptr;
+ luci::CircleConst *_params = nullptr;
+ luci::CircleConst *_indices = nullptr;
+};
+
+} // namespace
+
+TEST(FoldGatherTest, name)
+{
+ luci::FoldGatherPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64FoldGatherSimpleTest, fold_gather_simple)
+{
+ luci::FoldGatherPass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(1, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+}
+
+TEST_F(S32FoldGatherTwoDimsTest, fold_gather_with_two_dim)
+{
+ luci::FoldGatherPass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+ EXPECT_EQ(2, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(0));
+ EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(1));
+ EXPECT_EQ(5, folded_const->at<loco::DataType::S32>(2));
+ EXPECT_EQ(4, folded_const->at<loco::DataType::S32>(3));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_input_NEG)
+{
+ _indices->dtype(loco::DataType::FLOAT32);
+
+ luci::FoldGatherPass pass;
+ EXPECT_ANY_THROW(pass.run(graph()));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_axis_NEG)
+{
+ _gather->axis(1);
+
+ luci::FoldGatherPass pass;
+ EXPECT_ANY_THROW(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
index de973a431..68136b244 100644
--- a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
+++ b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
@@ -186,12 +186,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
// (1) normal case: qparam is propagated to input_1 and input_2
// (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
// input_2
- // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2
+ // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
// (4) const input: input_1 is const. constant values are quantized
// normal case: qparam of concat_node is propagated to input_1 and input_2
SimpleConcatGraph g(loco::DataType::U8);
- luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(&g.concat_node);
EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
@@ -202,7 +202,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
// input_1 is an input of input_2. qparam is propagated only to input_2
SimpleConcatGraph g2(loco::DataType::U8);
g2.input_2.input(&g2.input_1);
- luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(&g2.concat_node);
EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
EXPECT_EQ(77, g2.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
@@ -210,19 +210,19 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
EXPECT_FLOAT_EQ(3.14, g2.input_2.quantparam()->scale[0]);
EXPECT_EQ(77, g2.input_2.quantparam()->zerop[0]);
- // input_1 is concat. qparam is propagated only to input_2
+ // input_1 is concat. qparam is propagated to subsequent concat
SubsequentConcatGraph sg(loco::DataType::U8);
- luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(&sg.concat_node);
EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
EXPECT_EQ(77, sg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]);
- EXPECT_EQ(1, sg.input_1.quantparam()->zerop[0]);
+ EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
+ EXPECT_EQ(77, sg.input_1.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
EXPECT_EQ(77, sg.input_2.quantparam()->zerop[0]);
// input_1 is const. const values are quantized with the qparam of concat
ConstInputConcatGraph cg(loco::DataType::U8);
- luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(cg.concat_node);
EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -248,7 +248,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
// concat has fused activation function
g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(&g.concat_node);
EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
@@ -261,7 +261,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
// const values are quantized using its min/max
ConstInputConcatGraph cg(loco::DataType::U8);
cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+ luci::propagate_concat_quantparam(cg.concat_node);
EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -283,12 +283,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
// (1) normal case: qparam is propagated to input_1 and input_2
// (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
// input_2
- // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2
+ // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
// (4) const input: input_1 is const. constant values are quantized
// normal case: qparam of concat_node is propagated to input_1 and input_2
SimpleConcatGraph g(loco::DataType::S16);
- luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(&g.concat_node);
EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
@@ -299,7 +299,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
// input_1 is an input of input_2. qparam is propagated only to input_2
SimpleConcatGraph g2(loco::DataType::S16);
g2.input_2.input(&g2.input_1);
- luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(&g2.concat_node);
EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
EXPECT_EQ(0, g2.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
@@ -309,17 +309,17 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
// input_1 is concat. qparam is propagated only to input_2
SubsequentConcatGraph sg(loco::DataType::S16);
- luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(&sg.concat_node);
EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
EXPECT_EQ(0, sg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]);
+ EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
EXPECT_EQ(0, sg.input_1.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
EXPECT_EQ(0, sg.input_2.quantparam()->zerop[0]);
// input_1 is const. const values are quantized with the qparam of concat
ConstInputConcatGraph cg(loco::DataType::S16);
- luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(cg.concat_node);
EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -345,7 +345,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
// concat has fused activation function
g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(&g.concat_node);
EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
@@ -358,7 +358,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
// const values are quantized using its min/max
ConstInputConcatGraph cg(loco::DataType::S16);
cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+ luci::propagate_concat_quantparam(cg.concat_node);
EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
new file mode 100644
index 000000000..b4975486d
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+
+namespace
+{
+
+void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
+ loco::DataType quant_type)
+{
+ uint32_t size = const_node->size<loco::DataType::FLOAT32>();
+
+ const float scaling_factor_inv = 1.0 / scaling_factor;
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i));
+ double quantized_data = std::round(data * scaling_factor_inv) + zerop;
+ constexpr double int_max = static_cast<double>(std::numeric_limits<int32_t>::max());
+ constexpr double int_min = static_cast<double>(std::numeric_limits<int32_t>::min());
+ quantized_data = std::min(int_max, std::max(int_min, quantized_data));
+
+ quantized_values[i] = static_cast<int32_t>(quantized_data);
+ }
+
+ switch (quant_type)
+ {
+ case loco::DataType::U8:
+ const_node->dtype(loco::DataType::U8); // change the type of tensor
+ const_node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
+ break;
+ case loco::DataType::S16:
+ assert(zerop == 0);
+ const_node->dtype(loco::DataType::S16); // change the type of tensor
+ const_node->size<loco::DataType::S16>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ const_node->at<loco::DataType::S16>(i) =
+ std::min(32767, std::max(-32767, quantized_values[i]));
+ break;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
+void overwrite_quantparam(const luci::CircleNode *source, luci::CircleNode *target)
+{
+ auto source_qparam = source->quantparam();
+ if (source_qparam == nullptr)
+ throw std::runtime_error("source quantparam is not found during overwrite");
+
+ auto target_qparam = target->quantparam();
+ if (target_qparam == nullptr)
+ {
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ target->quantparam(std::move(quantparam));
+ target_qparam = target->quantparam();
+
+ if (target_qparam == nullptr)
+ throw std::runtime_error("Creating new quant param failed");
+ }
+ target_qparam->min = source_qparam->min;
+ target_qparam->max = source_qparam->max;
+ target_qparam->scale = source_qparam->scale;
+ target_qparam->zerop = source_qparam->zerop;
+ target_qparam->quantized_dimension = source_qparam->quantized_dimension;
+}
+
+/**
+ * Tells if pad_v2 quantization should ignore padding value
+ * In that case padding const will be quantized with input parameters, and probably clipped
+ */
+bool ignore_pad_v2_const_quantization(const luci::CirclePadV2 *pad)
+{
+ // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly
+ // TODO use metadata hints to detect this case
+ auto const_value_node = dynamic_cast<const luci::CircleConst *>(pad->arg(2));
+ if (!const_value_node)
+ return false;
+ if (const_value_node->dtype() == loco::DataType::FLOAT32)
+ {
+ float const_value = const_value_node->at<loco::DataType::FLOAT32>(0);
+ if (const_value == std::numeric_limits<float>::lowest())
+ return true;
+ }
+ return false;
+}
+
+/** EXAMPLE
+ *
+ * BEFORE
+ *
+ * [CircleNode] [CircleConst]
+ * (qparam1) (FP32)
+ * \ /
+ * \ /
+ * [CirclePack]
+ * (qparam2)
+ *
+ * AFTER
+ *
+ * [CircleNode] [CircleConst] [CircleConst] <- Dead node
+ * (qparam2) (qparam2) (FP32)
+ * \ /
+ * \ /
+ * [CirclePack]
+ * (qparam2)
+ *
+ * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs.
+ */
+void propagate_pack_quantparam(luci::CirclePack *pack)
+{
+ assert(pack->quantparam() != nullptr);
+
+ const auto num_inputs = pack->values_count();
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i));
+
+ // Quantize constant values
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+ if (const_node->dtype() != loco::DataType::FLOAT32)
+ throw std::runtime_error("Unsupported data type for constant input of pack Op");
+
+ const auto pack_qparam = pack->quantparam();
+ if (pack_qparam == nullptr)
+ throw std::runtime_error("quantparam of pack is not found during propagation");
+
+ assert(pack_qparam->scale.size() == 1);
+ assert(pack_qparam->zerop.size() == 1);
+ const auto scaling_factor = pack_qparam->scale[0];
+ const auto zerop = pack_qparam->zerop[0];
+
+ auto new_const = luci::clone(const_node);
+ quant_const_values(new_const, scaling_factor, zerop, pack->dtype());
+ pack->values(i, new_const);
+ overwrite_quantparam(pack, new_const);
+ }
+ else
+ {
+ const auto succs = loco::succs(node);
+ if (succs.size() > 1)
+ continue;
+
+ // Non-const input must have been quantized
+ assert(node->quantparam() != nullptr);
+ overwrite_quantparam(pack, node);
+ }
+ }
+}
+
+/** EXAMPLE
+ *
+ *
+ *
+ * BEFORE
+ *
+ * [CircleNode] [CircleConst] [CircleConst] [CircleNode]
+ * (S32) (S32) (FP32) (U8 qparam1)
+ * \ \ / /
+ * \ \ / /
+ * \ \ / /
+ * -------[CircleOneHot]-------
+ * (U8 qparam2)
+ *
+ * AFTER
+ *
+ * [CircleNode] [CircleConst] [CircleConst] [CircleNode] [CircleConst] <- Dead node
+ * (S32) (S32) (U8 qparam2) (U8 qparam2) (FP32)
+ * \ \ / /
+ * \ \ / /
+ * \ \ / /
+ * -------[CircleOneHot]-------
+ * (U8 qparam2)
+ *
+ * NOTE Quantization parameter of CircleOneHot (qparam2) is propagated to on_value/off_value.
+ */
+void propagate_one_hot_quantparam(luci::CircleOneHot *one_hot)
+{
+ assert(one_hot->quantparam() != nullptr);
+
+ // Propagate quantization parameters from output to inputs,
+ // to fit both input and counstant_value in one quant range.
+ auto quant_input = [one_hot](void (luci::CircleOneHot::*arg_setter)(loco::Node *),
+ loco::Node *(luci::CircleOneHot::*arg_getter)() const) {
+ auto node = loco::must_cast<luci::CircleNode *>((one_hot->*arg_getter)());
+
+ // Quantize constant values
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+ if (is_quantized(const_node))
+ return;
+
+ if (const_node->dtype() != loco::DataType::FLOAT32)
+ throw std::runtime_error("Unsupported data type for constant input of OneHot Op");
+
+ const auto qparam = one_hot->quantparam();
+ if (qparam == nullptr)
+ throw std::runtime_error("quantparam of OneHot is not found during propagation");
+
+ assert(qparam->scale.size() == 1);
+ const auto scaling_factor = qparam->scale.at(0);
+ const auto zerop = qparam->zerop.at(0);
+
+ auto new_const = luci::clone(const_node);
+ quant_const_values(new_const, scaling_factor, zerop, one_hot->dtype());
+ overwrite_quantparam(one_hot, new_const);
+ (one_hot->*arg_setter)(new_const);
+ }
+ else
+ {
+ const auto succs = loco::succs(node);
+ if (succs.size() > 1)
+ return;
+
+ // Non-const input must have been quantized
+ assert(node->quantparam() != nullptr);
+ overwrite_quantparam(one_hot, node);
+ }
+ };
+
+ quant_input(&luci::CircleOneHot::on_value, &luci::CircleOneHot::on_value);
+ quant_input(&luci::CircleOneHot::off_value, &luci::CircleOneHot::off_value);
+}
+
+} // namespace
+
+namespace luci
+{
+
+/** BEFORE
+ *
+ * [CircleNode] [CircleConst]
+ * (U8 qparam1) (FP32)
+ * \ /
+ * \ /
+ * [CircleConcatenation]
+ * (U8 qparam2)
+ *
+ * AFTER
+ * [CircleNode] [CircleConst] [CircleConst] <- Dead node
+ * (U8 qparam2) (U8 qparam2) (FP32)
+ * \ /
+ * \ /
+ * [CircleConcatenation]
+ * (U8 qparam2)
+ */
+void propagate_concat_quantparam(luci::CircleConcatenation *concat)
+{
+ assert(concat->quantparam() != nullptr);
+
+ const auto num_inputs = concat->numValues();
+
+ // Quantize const inputs using their values if concat has fused act function
+ if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ {
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ auto node = concat->arg(i);
+ auto const_node = dynamic_cast<luci::CircleConst *>(node);
+ if (const_node != nullptr)
+ {
+ auto new_const = luci::clone(const_node);
+ quant_const(new_const, concat->dtype());
+ concat->values(i, new_const);
+ }
+ }
+ return;
+ }
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
+
+ // Quantize constant values
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+
+ const auto concat_qparam = concat->quantparam();
+ assert(concat_qparam->scale.size() == 1);
+ const auto scaling_factor = concat_qparam->scale[0];
+ const auto zerop = concat_qparam->zerop[0];
+
+ auto new_const = luci::clone(const_node);
+ quant_const_values(new_const, scaling_factor, zerop, concat->dtype());
+ concat->values(i, new_const);
+ overwrite_quantparam(concat, new_const);
+ }
+ else
+ {
+ const auto succs = loco::succs(node);
+ if (succs.size() > 1)
+ continue;
+
+ // Non-const input must have been quantized
+ assert(node->quantparam() != nullptr);
+ overwrite_quantparam(concat, node);
+ }
+ }
+}
+
+/** BEFORE
+ *
+ * [CircleNode] [CircleConst] [CircleConst]
+ * (U8 qparam1) (S32) (FP32)
+ * \ | /
+ * \ | /
+ * [CirclePadV2]
+ * (U8 qparam2)
+ *
+ * AFTER (case 1)
+ *
+ * By default qparam is propagated from output to inputs to meet backend requirements.
+ *
+ * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node
+ * (U8 qparam2) (S32) (U8 qparam2) (FP32)
+ * \ | /
+ * \ | /
+ * [CirclePadV2]
+ * (U8 qparam2)
+ *
+ * AFTER (case 2)
+ *
+ * In case padded value is the lowest float value
+ * Qparam is propagated from input to output and constant.
+ *
+ * This is a special case for optimization constructed pad, needed to guarantee that
+ * extremely large negative constant do not stretch output quantization range.
+ *
+ * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node
+ * (U8 qparam1) (S32) (U8 qparam1) (FP32)
+ * \ | /
+ * \ | /
+ * [CirclePadV2]
+ * (U8 qparam1)
+ */
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2)
+{
+ if (ignore_pad_v2_const_quantization(pad_v2))
+ {
+ // propagate input quantization paramters from input to output and padding const value
+ auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
+ overwrite_quantparam(pad_v2_input, pad_v2);
+
+ auto const_value_node = loco::must_cast<luci::CircleConst *>(
+ pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
+ auto new_const = luci::clone(const_value_node);
+
+ const auto pad_v2_input_qparam = pad_v2_input->quantparam();
+ assert(pad_v2_input_qparam != nullptr);
+ assert(pad_v2_input_qparam->scale.size() == 1);
+ const auto scaling_factor = pad_v2_input_qparam->scale.at(0);
+ const auto zerop = pad_v2_input_qparam->zerop.at(0);
+
+ quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+ overwrite_quantparam(pad_v2_input, new_const);
+ pad_v2->constant_values(new_const);
+ return;
+ }
+
+ // Propagate quantization paramters from output to inputs,
+ // to fit both input and counstant_value in one quant range.
+ auto quant_input = [pad_v2](void (CirclePadV2::*arg_setter)(loco::Node *), uint32_t arg) {
+ auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg));
+
+ // Quantize constant values
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+ if (is_quantized(const_node))
+ return;
+
+ if (const_node->dtype() != loco::DataType::FLOAT32)
+ throw std::runtime_error("Unsupported data type for constant input of PadV2 Op");
+
+ const auto pad_v2_qparam = pad_v2->quantparam();
+ if (pad_v2_qparam == nullptr)
+ throw std::runtime_error("quantparam of PadV2 is not found during propagation");
+
+ assert(pad_v2_qparam->scale.size() == 1);
+ const auto scaling_factor = pad_v2_qparam->scale.at(0);
+ const auto zerop = pad_v2_qparam->zerop.at(0);
+
+ auto new_const = luci::clone(const_node);
+ quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+ overwrite_quantparam(pad_v2, new_const);
+ (pad_v2->*arg_setter)(new_const);
+ }
+ else
+ {
+ const auto succs = loco::succs(node);
+ if (succs.size() > 1)
+ return;
+
+ // Non-const input must have been quantized
+ assert(node->quantparam() != nullptr);
+ overwrite_quantparam(pad_v2, node);
+ }
+ };
+
+ quant_input(&CirclePadV2::input, 0);
+ quant_input(&CirclePadV2::constant_values, 2);
+}
+
+} // namespace luci
+
+namespace
+{
+
+// Visitor to propagate quantization parameters backwards
+struct PropagateQParamBackward final : public luci::CircleNodeMutableVisitor<void>
+{
+ void visit(luci::CircleNode *) {}
+
+ void visit(luci::CircleConcatenation *node) { propagate_concat_quantparam(node); }
+
+ void visit(luci::CircleOneHot *node) { propagate_one_hot_quantparam(node); }
+
+ void visit(luci::CirclePack *node) { propagate_pack_quantparam(node); }
+
+ void visit(luci::CirclePadV2 *node) { propagate_pad_v2_quantparam(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamBackwardPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+
+ // We use reverse post-order traversal as qparam is propagated backward
+ auto nodes = loco::postorder_traversal(loco::output_nodes(g));
+ std::reverse(nodes.begin(), nodes.end());
+ for (auto node : nodes)
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ INFO(l) << "PropagateQParamBackwardPass visit node: " << circle_node->name() << std::endl;
+
+ // We can't propagate non-existent qparam
+ if (circle_node->quantparam() == nullptr)
+ continue;
+
+ PropagateQParamBackward pqb;
+ circle_node->accept(&pqb);
+ }
+
+ // This pass is only run once, so return false
+ // TODO Refactoring not to return meaningless value
+ return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp
new file mode 100644
index 000000000..33af70449
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale.emplace_back(scale);
+ qparam->zerop.emplace_back(zp);
+
+ node->quantparam(std::move(qparam));
+}
+
+/**
+ * @brief Base Test Graph
+ */
+struct TestGraph
+{
+public:
+ virtual void init(void) = 0;
+};
+
+/**
+ * Graph with two concats
+ *
+ * [CircleInput] [CircleConst]
+ * \ /
+ * [CircleConcatenation] [CircleConst]
+ * | |
+ * [CircleConcatenation]
+ * |
+ * [CircleOutput]
+ *
+ * BEFORE
+ * - Concat1 and Concat 2 have different qparams
+ *
+ * AFTER
+ * - All Ops have the same qparam
+ */
+struct SubsequentConcatGraph : public TestGraph
+{
+public:
+ void init(void) final
+ {
+ // graph input and output
+ auto graph_input = g.inputs()->create();
+ auto graph_output = g.outputs()->create();
+
+ // input
+ input = g.nodes()->create<luci::CircleInput>();
+ input->index(graph_input->index());
+ input->shape({1, 4, 4, 3});
+ input->dtype(loco::DataType::U8);
+ set_qparam(input, 1.0, 1);
+
+ // const1
+ const1 = g.nodes()->create<luci::CircleConst>();
+ const1->shape({1, 4, 4, 3});
+ const1->dtype(loco::DataType::FLOAT32);
+ const1->size<loco::DataType::FLOAT32>(48);
+ for (uint32_t i = 0; i < 48; i++)
+ const1->at<loco::DataType::FLOAT32>(i) = i;
+
+ // concat1
+ concat1 = g.nodes()->create<luci::CircleConcatenation>(2);
+ concat1->shape({1, 4, 4, 6});
+ concat1->dtype(loco::DataType::U8);
+ set_qparam(concat1, 2.0, 2);
+ concat1->values(0, input);
+ concat1->values(1, const1);
+ concat1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ // const2
+ const2 = g.nodes()->create<luci::CircleConst>();
+ const2->shape({1, 4, 4, 3});
+ const2->dtype(loco::DataType::FLOAT32);
+ const2->size<loco::DataType::FLOAT32>(48);
+ for (uint32_t i = 0; i < 48; i++)
+ const2->at<loco::DataType::FLOAT32>(i) = i;
+
+ // concat2
+ concat2 = g.nodes()->create<luci::CircleConcatenation>(2);
+ concat2->shape({1, 4, 4, 9});
+ concat2->dtype(loco::DataType::U8);
+ set_qparam(concat2, 3.0, 3);
+ concat2->values(0, concat1);
+ concat2->values(1, const2);
+ concat2->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ // output
+ output = g.nodes()->create<luci::CircleOutput>();
+ output->index(graph_output->index());
+ output->from(concat2);
+ output->shape({1, 4, 4, 9});
+ output->dtype(loco::DataType::U8);
+ set_qparam(output, 3.0, 3);
+ }
+
+public:
+ loco::Graph g;
+ CircleInput *input = nullptr;
+ CircleConcatenation *concat1 = nullptr;
+ CircleConcatenation *concat2 = nullptr;
+ CircleConst *const1 = nullptr;
+ CircleConst *const2 = nullptr;
+ CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamBackwardPassTest, name)
+{
+ luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamBackwardPassTest, subsequent_propagation)
+{
+ SubsequentConcatGraph graph;
+
+ graph.init();
+
+ luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+
+ pass.run(&graph.g);
+
+ EXPECT_EQ(3.0, graph.concat2->quantparam()->scale[0]);
+ EXPECT_EQ(3, graph.concat2->quantparam()->zerop[0]);
+
+ auto const2 = loco::must_cast<CircleNode *>(graph.concat2->values(1));
+ EXPECT_EQ(3.0, const2->quantparam()->scale[0]);
+ EXPECT_EQ(3, const2->quantparam()->zerop[0]);
+
+ EXPECT_EQ(3.0, graph.concat1->quantparam()->scale[0]);
+ EXPECT_EQ(3, graph.concat1->quantparam()->zerop[0]);
+
+ auto const1 = loco::must_cast<CircleNode *>(graph.concat1->values(1));
+ EXPECT_EQ(3.0, const1->quantparam()->scale[0]);
+ EXPECT_EQ(3, const1->quantparam()->zerop[0]);
+
+ EXPECT_EQ(3.0, graph.input->quantparam()->scale[0]);
+ EXPECT_EQ(3, graph.input->quantparam()->zerop[0]);
+}
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
new file mode 100644
index 000000000..003e4c293
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
+{
+ assert(src->scale.size() == dst->scale.size());
+ assert(src->zerop.size() == dst->zerop.size());
+
+ // src and dst have the same qparam
+ if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
+ std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
+ src->quantized_dimension == dst->quantized_dimension)
+ return false;
+
+ dst->scale.assign(src->scale.begin(), src->scale.end());
+ dst->zerop.assign(src->zerop.begin(), src->zerop.end());
+ dst->quantized_dimension = src->quantized_dimension;
+ return true;
+}
+
+bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
+{
+ // Skip nodes that do not have quantparams
+ auto src_qparam = src->quantparam();
+ if (not src_qparam)
+ return false;
+
+ auto dst_qparam = dst->quantparam();
+ if (not dst_qparam)
+ return false;
+
+ return copy_qparam(src_qparam, dst_qparam);
+}
+
+// Visitor to propagate quantization parameters
+struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool>
+{
+ PropagateQParamForward() = default;
+
+ bool visit(luci::CircleNode *) { return false; }
+
+ bool visit(luci::CircleGather *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->params());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleReshape *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->tensor());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleTranspose *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleStridedSlice *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleSplitOut *node)
+ {
+ auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+ auto input_node = loco::must_cast<luci::CircleNode *>(split->input());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleSplitVOut *node)
+ {
+ auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+ auto input_node = loco::must_cast<luci::CircleNode *>(splitv->input());
+ return copy_qparam(input_node, node);
+ }
+
+ bool visit(luci::CircleUnpackOut *node)
+ {
+ auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+ auto input_node = loco::must_cast<luci::CircleNode *>(unpack->value());
+ return copy_qparam(input_node, node);
+ }
+
+ // Propagate qparam across Quantize op to ensure
+ // special qparams (pre-defined values, integer scale)
+ bool visit(luci::CircleQuantize *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+ // Skip if input_node is not quantized activation
+ if (input_node->dtype() != loco::DataType::U8 and input_node->dtype() != loco::DataType::S16)
+ return false;
+
+ // If input_node and node have the same dtype, Quantize op
+ // will do rescale, not requantize for mixed-precision
+ if (input_node->dtype() == node->dtype())
+ return false;
+
+ assert(node->dtype() == loco::DataType::U8 or node->dtype() == loco::DataType::S16);
+
+ auto prev_qparam = node->quantparam();
+ assert(prev_qparam);
+ assert(prev_qparam->scale.size() == 1);
+ assert(prev_qparam->zerop.size() == 1);
+
+ const auto prev_scale = prev_qparam->scale[0];
+ const auto prev_zerop = prev_qparam->zerop[0];
+
+ auto qtype = luci::activation_qtype(input_node);
+ switch (qtype)
+ {
+ case luci::ActivationQType::PreDefinedValue:
+ node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype()));
+ break;
+ case luci::ActivationQType::IntScale:
+ luci::set_int_scale(node);
+ break;
+ default:
+ break;
+ }
+
+ assert(node->quantparam());
+ assert(node->quantparam()->scale.size() == 1);
+ assert(node->quantparam()->zerop.size() == 1);
+
+ const auto scale = node->quantparam()->scale[0];
+ const auto zerop = node->quantparam()->zerop[0];
+
+ // Compare qparam with saved values to detect update
+ return scale != prev_scale or zerop != prev_zerop;
+ }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamForwardPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ LOGGER(l);
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ INFO(l) << "PropagateQParamForwardPass visit node: " << circle_node->name() << std::endl;
+
+ PropagateQParamForward pqp;
+ if (circle_node->accept(&pqp))
+ changed = true;
+
+ if (_TF_style_maxpool)
+ {
+ if (auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(node))
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(maxpool->value());
+ copy_qparam(input, maxpool);
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp
new file mode 100644
index 000000000..a734c0873
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
+ const std::vector<int64_t> &zp)
+{
+ assert(node->quantparam() == nullptr);
+
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->scale = scale;
+ quantparam->zerop = zp;
+ node->quantparam(std::move(quantparam));
+}
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [Conv] (qparam 1)
+ * |
+ * [Reshape] (qparam 2)
+ *
+ * AFTER
+ *
+ * [Conv] (qparam 2)
+ * |
+ * [Reshape] (qparam 2)
+ *
+ */
+class SimpleGraph
+{
+public:
+ SimpleGraph()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ conv = g.nodes()->create<luci::CircleConv2D>();
+ reshape = g.nodes()->create<luci::CircleReshape>();
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
+ addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
+
+ conv->input(input);
+ reshape->tensor(conv);
+ output->from(reshape);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleConv2D *conv = nullptr;
+ luci::CircleReshape *reshape = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+/**
+ * Test graph for forward propagation in Quantize Op
+ *
+ * BEFORE
+ *
+ * [Tanh U8] (qparam 1 - pre-defined for U8)
+ * |
+ * [Quantize S16] (qparam 2 - not pre-defined value)
+ *
+ * AFTER
+ *
+ * [Tanh U8] (qparam 1 - pre-defined for U8)
+ * |
+ * [Quantize S16] (qparam 3 - pre-defined for S16)
+ *
+ */
+class TanhQuantizeGraph
+{
+public:
+ TanhQuantizeGraph()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ tanh = g.nodes()->create<luci::CircleTanh>();
+ quantize = g.nodes()->create<luci::CircleQuantize>();
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ tanh->dtype(loco::DataType::U8);
+ quantize->dtype(loco::DataType::S16);
+
+ addQuantParam(tanh, {2.0f / 256.0f}, {128}); // pre-defined qparam for U8
+ addQuantParam(quantize, {1.0}, {0}); // not pre-defined values
+
+ tanh->x(input);
+ quantize->input(tanh);
+ output->from(quantize);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleTanh *tanh = nullptr;
+ luci::CircleQuantize *quantize = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+/**
+ * Test graph for forward propagation in Quantize Op
+ *
+ * BEFORE
+ *
+ * [Floor U8] (qparam 1 - int scale)
+ * |
+ * [Quantize S16] (qparam 2 - not int scale)
+ *
+ * AFTER
+ *
+ * [Floor U8] (qparam 1 - int scale)
+ * |
+ * [Quantize S16] (qparam 3 - int scale)
+ *
+ */
+class FloorQuantizeGraph
+{
+public:
+ FloorQuantizeGraph()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ floor = g.nodes()->create<luci::CircleFloor>();
+ quantize = g.nodes()->create<luci::CircleQuantize>();
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ floor->dtype(loco::DataType::U8);
+ quantize->dtype(loco::DataType::S16);
+
+ addQuantParam(floor, {4.0f}, {128}); // int scale
+ addQuantParam(quantize, {0.3}, {0}); // not int scale
+
+ floor->x(input);
+ quantize->input(floor);
+ output->from(quantize);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleFloor *floor = nullptr;
+ luci::CircleQuantize *quantize = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamForwardPassTest, name)
+{
+ luci::PropagateQParamForwardPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamForward, simple)
+{
+ SimpleGraph g;
+
+ luci::PropagateQParamForwardPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]);
+ EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]);
+ EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]);
+ EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]);
+ EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]);
+ EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, wrong_op_NEG)
+{
+ SimpleGraph g;
+ g.output->from(g.conv);
+ g.reshape->drop();
+
+ luci::PropagateQParamForwardPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
+ EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
+ EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
+ EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
+ EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
+ EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, tanh_predefined_value)
+{
+ TanhQuantizeGraph g;
+
+ luci::PropagateQParamForwardPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(1.0f / 32768.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, floor_int_scale)
+{
+ FloorQuantizeGraph g;
+
+ luci::PropagateQParamForwardPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(1.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, same_dtype_NEG)
+{
+ FloorQuantizeGraph g;
+ g.quantize->dtype(loco::DataType::U8);
+
+ luci::PropagateQParamForwardPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ // Qparam is not propagated as ifm/ofm of Quantize Op have the same dtype
+ EXPECT_FLOAT_EQ(0.3f, g.quantize->quantparam()->scale[0]);
+}
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp
deleted file mode 100644
index b1cb7a418..000000000
--- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/PropagateQuantParamPass.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Log.h>
-
-#include <iostream>
-
-namespace
-{
-
-bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
-{
- assert(src->scale.size() == dst->scale.size());
- assert(src->zerop.size() == dst->zerop.size());
-
- // src and dst have the same qparam
- if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
- std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
- src->quantized_dimension == dst->quantized_dimension)
- return false;
-
- dst->scale.assign(src->scale.begin(), src->scale.end());
- dst->zerop.assign(src->zerop.begin(), src->zerop.end());
- dst->quantized_dimension = src->quantized_dimension;
- return true;
-}
-
-bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
-{
- // Skip nodes that do not have quantparams
- auto src_qparam = src->quantparam();
- if (not src_qparam)
- return false;
-
- auto dst_qparam = dst->quantparam();
- if (not dst_qparam)
- return false;
-
- return copy_qparam(src_qparam, dst_qparam);
-}
-
-// Visitor to propagate quantization parameters
-struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool>
-{
- PropagateQuantParam() = default;
-
- bool visit(luci::CircleNode *) { return false; }
-
- bool visit(luci::CircleReshape *node)
- {
- auto input = node->tensor();
- if (loco::succs(input).size() != 1)
- return false;
-
- auto input_node = loco::must_cast<luci::CircleNode *>(input);
- return copy_qparam(input_node, node);
- }
-
- bool visit(luci::CircleTranspose *node)
- {
- auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
- return copy_qparam(input_node, node);
- }
-
- // TODO : Add more Ops (e.g., layout-changing Ops)
-};
-
-} // namespace
-
-namespace luci
-{
-
-bool PropagateQuantParamPass::run(loco::Graph *g)
-{
- bool changed = false;
- LOGGER(l);
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl;
-
- PropagateQuantParam pqp;
- if (circle_node->accept(&pqp))
- changed = true;
- }
-
- return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp
deleted file mode 100644
index 0f1564223..000000000
--- a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/PropagateQuantParamPass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
- const std::vector<int64_t> &zp)
-{
- assert(node->quantparam() == nullptr);
-
- auto quantparam = std::make_unique<luci::CircleQuantParam>();
- quantparam->scale = scale;
- quantparam->zerop = zp;
- node->quantparam(std::move(quantparam));
-}
-
-/**
- * Simple graph for test
- *
- * BEFORE
- *
- * [Conv] (qparam 1)
- * |
- * [Reshape] (qparam 2)
- *
- * AFTER
- *
- * [Conv] (qparam 2)
- * |
- * [Reshape] (qparam 2)
- *
- */
-class SimpleGraph
-{
-public:
- SimpleGraph()
- {
- input = g.nodes()->create<luci::CircleInput>();
- conv = g.nodes()->create<luci::CircleConv2D>();
- reshape = g.nodes()->create<luci::CircleReshape>();
- output = g.nodes()->create<luci::CircleOutput>();
-
- auto graph_input = g.inputs()->create();
- input->index(graph_input->index());
- auto graph_output = g.outputs()->create();
- output->index(graph_output->index());
-
- addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
- addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
-
- conv->input(input);
- reshape->tensor(conv);
- output->from(reshape);
- }
-
-public:
- loco::Graph g;
- luci::CircleInput *input;
- luci::CircleConv2D *conv;
- luci::CircleReshape *reshape;
- luci::CircleOutput *output;
-};
-
-} // namespace
-
-TEST(PropagateQuantParamPassTest, name)
-{
- luci::PropagateQuantParamPass pass;
- auto const name = pass.name();
- ASSERT_NE(nullptr, name);
-}
-
-TEST(PropagateQuantParam, simple)
-{
- SimpleGraph g;
-
- luci::PropagateQuantParamPass pass;
- while (pass.run(&g.g))
- ;
-
- EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]);
- EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]);
- EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]);
- EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]);
- EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]);
- EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]);
-}
-
-TEST(PropagateQuantParam, wrong_op_NEG)
-{
- SimpleGraph g;
- g.output->from(g.conv);
- g.reshape->drop();
-
- luci::PropagateQuantParamPass pass;
- while (pass.run(&g.g))
- ;
-
- EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
- EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
- EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
- EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
- EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
- EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
-}
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
index 2f6fed46e..ad86cedf4 100644
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -33,43 +33,6 @@ bool is_quantized(const CircleNode *node)
node->dtype() == loco::DataType::S64); // bias (int16 quant)
}
-// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
-bool is_weights(CircleNode *node)
-{
- auto circle_const = dynamic_cast<CircleConst *>(node);
- if (circle_const == nullptr)
- return false;
-
- auto succs = loco::succs(node);
-
- // Node is weights if it is the weights of all of its successors
- for (auto out : succs)
- {
- bool is_weights = false;
-
- auto conv = dynamic_cast<CircleConv2D *>(out);
- if (conv != nullptr && conv->filter() == circle_const)
- is_weights = true;
-
- auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
- if (dw_conv != nullptr && dw_conv->filter() == circle_const)
- is_weights = true;
-
- auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
- if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
- is_weights = true;
-
- auto fc = dynamic_cast<CircleFullyConnected *>(out);
- if (fc != nullptr && fc->weights() == circle_const)
- is_weights = true;
-
- if (!is_weights)
- return false;
- }
-
- return true;
-}
-
uint8_t fp32_to_uint8_cast(float f)
{
assert(std::numeric_limits<uint8_t>::min() <= f);
@@ -77,7 +40,6 @@ uint8_t fp32_to_uint8_cast(float f)
return static_cast<uint8_t>(f);
}
-// Per-layer quantization of weights (const tensor) using given min/max values
void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max)
@@ -107,7 +69,6 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
}
}
-// Per-layer quantization of weights (const tensor) using given min/max values
void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max)
@@ -315,4 +276,123 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
indices[2] * dimension.dim(3).value() + indices[3];
}
+ActivationQType activation_qtype(const CircleNode *node)
+{
+ auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+ if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+ return ActivationQType::PreDefinedValue;
+
+ switch (node->opcode())
+ {
+ case CircleOpcode::LOGISTIC:
+ case CircleOpcode::TANH:
+ case CircleOpcode::SOFTMAX:
+ return ActivationQType::PreDefinedValue;
+ case CircleOpcode::FLOOR:
+ case CircleOpcode::FLOOR_DIV:
+ case CircleOpcode::FLOOR_MOD:
+ case CircleOpcode::CEIL:
+ return ActivationQType::IntScale;
+ default:
+ break;
+ }
+
+ return ActivationQType::MinMax;
+}
+
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype)
+{
+ auto qparam = std::make_unique<CircleQuantParam>();
+
+ auto set_qparam = [&qparam](float scale, int64_t zp) {
+ qparam->scale.emplace_back(scale);
+ qparam->zerop.emplace_back(zp);
+ };
+
+ switch (opcode)
+ {
+ case CircleOpcode::LOGISTIC:
+ if (dtype == loco::DataType::U8)
+ set_qparam(1.0f / 256.0f, 0);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32768.0f, 0);
+ }
+ break;
+ case CircleOpcode::TANH:
+ if (dtype == loco::DataType::U8)
+ set_qparam(2.0f / 256.0f, 128);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32768.0f, 0);
+ }
+ break;
+ case CircleOpcode::SOFTMAX:
+ if (dtype == loco::DataType::U8)
+ set_qparam(1.0f / 255.0f, 0);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32767.0f, 0);
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported opcode with pre-defined qparam");
+ }
+ return std::move(qparam);
+}
+
+// For nodes with integer output, we use integer scale
+void set_int_scale(luci::CircleNode *node)
+{
+ assert(node); // FIX_CALLER_UNLESS
+
+ auto qparam = node->quantparam();
+ assert(qparam); // FIX_CALLER_UNLESS
+ assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+
+ auto fp_scale = qparam->scale[0];
+ qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
+}
+
+void quant_const(luci::CircleConst *node, loco::DataType quant_type)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ min = data < min ? data : min;
+ max = data > max ? data : max;
+ }
+
+ float scaling_factor{0.0};
+ int64_t zp{0};
+ float nudged_min{0.0};
+ float nudged_max{0.0};
+
+ switch (quant_type)
+ {
+ case loco::DataType::U8:
+ asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ break;
+ case loco::DataType::S16:
+ symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ break;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ node->quantparam(std::move(quantparam));
+}
+
} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
index 605f6a77e..cd8cec95a 100644
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -23,33 +23,61 @@
namespace luci
{
+// Compute scale/zp using given min/max for symmetric quantization (int16)
void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
float &nudged_min, float &nudged_max);
+// Compute scale/zp using given min/max for asymmetric quantization (uint8)
void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
float &nudged_min, float &nudged_max);
+// Asymmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max);
+// Symmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max);
+// Helper function to get channel dimension
+// TODO Embed this function into iterate_per_channel
bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
int32_t &channel_dim_index);
+// Calculate offset of the given indices in dimension
uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
-void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type);
+// Backward propagation of concatenation qparam
+void propagate_concat_quantparam(luci::CircleConcatenation *concat);
-void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type);
-
-bool is_weights(CircleNode *node);
+// Backward propagation of pad_v2 qparam
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2);
+// Return true if the node is quantized
bool is_quantized(const CircleNode *node);
+enum ActivationQType
+{
+ MinMax, // Quantize using recorded min/max
+ PreDefinedValue, // Quantize using pre-defined values
+ IntScale, // Round scale to a positive integer
+};
+
+ActivationQType activation_qtype(const CircleNode *node);
+
+// Create qparam with pre-defined values for speical operators
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype);
+
+// Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil)
+void set_int_scale(luci::CircleNode *node);
+
+// Quantize const tensor using its min/max values
+void quant_const(luci::CircleConst *node, loco::DataType quant_type);
+
} // namespace luci
#endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp
new file mode 100644
index 000000000..149331824
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeActivation.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace luci;
+
+namespace
+{
+
+bool has_min_max(const CircleNode *node)
+{
+ return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+} // namespace
+
+// QuantizeActivation
+namespace luci
+{
+
+void QuantizeActivation::visit(luci::CircleNode *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
+
+ // Check if this is already quantized
+ if (is_quantized(node))
+ return;
+
+ // Check if this is bool type (bool type is not quantized)
+ if (node->dtype() == loco::DataType::BOOL)
+ return;
+
+ // Check if this is const (const activation is handled by QuantizeConstInputActivation)
+ // NOTE QuantizePreChecker guarantees weights/bias are const.
+ // Update this code when we accept non-const weights/bias.
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+ return;
+
+ // Check if this is activation
+ // We assume min/max are recorded only for activations
+ if (has_min_max(node))
+ {
+ // Quantize using recorded min/max
+ auto quantparam = node->quantparam();
+ assert(quantparam);
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->max.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto max = quantparam->max[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ if (output_type == loco::DataType::U8)
+ {
+ compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ node->dtype(loco::DataType::U8);
+ }
+ else
+ {
+ compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ node->dtype(loco::DataType::S16);
+ }
+
+ node->quantparam()->scale.push_back(scaling_factor);
+ node->quantparam()->zerop.push_back(zp);
+ }
+ // Fix special attributes
+ if (node->opcode() == luci::CircleOpcode::CAST)
+ {
+ auto *cast = loco::must_cast<luci::CircleCast *>(node);
+ auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x());
+
+ // make sure that cast_input is already quantized
+ assert(cast_input->dtype() != loco::DataType::FLOAT32);
+ cast->in_data_type(cast_input->dtype());
+ cast->out_data_type(cast->dtype());
+ }
+}
+
+} // namespace luci
+
+// QuantizeSpecialActivation
+namespace luci
+{
+
+void QuantizeSpecialActivation::visit(luci::CircleNode *node)
+{
+ // Nodes fused with activation functions which need special quantization
+ auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+ if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+ {
+ auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+ node->quantparam(std::move(qparam));
+ }
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleLogistic *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+ auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type);
+ node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleTanh *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+ auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+ node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+ auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type);
+ node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloor *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+ set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorDiv *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+ set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorMod *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+ set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleCeil *node)
+{
+ assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+ set_int_scale(node);
+}
+
+} // namespace luci
+
+// QuantizeConstInputActivation
+namespace luci
+{
+
+// Default behavior (NYI)
+void QuantizeConstInputActivation::visit(luci::CircleNode *node)
+{
+ for (uint32_t i = 0; i < node->arity(); i++)
+ {
+ auto input_node = node->arg(i);
+ auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+ if (const_node != nullptr)
+ throw std::runtime_error("Unsupported Op for const inputs");
+ }
+}
+
+// INPUT_NAME is the only activation of NODE
+#define QUANTIZE_SINGLE_CONST_INPUT(NODE, INPUT_NAME) \
+ void QuantizeConstInputActivation::visit(NODE *node) \
+ { \
+ auto input = node->INPUT_NAME(); \
+ auto const_node = dynamic_cast<luci::CircleConst *>(input); \
+ if (const_node && !is_quantized(const_node)) \
+ { \
+ auto new_const = luci::clone(const_node); \
+ quant_const(new_const, _output_type); \
+ node->INPUT_NAME(new_const); \
+ } \
+ }
+
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define QUANTIZE_TWO_CONST_INPUTS(NODE, INPUT_NAME1, INPUT_NAME2) \
+ void QuantizeConstInputActivation::visit(NODE *node) \
+ { \
+ auto input1 = node->INPUT_NAME1(); \
+ auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \
+ if (const_node1 && !is_quantized(const_node1)) \
+ { \
+ auto new_const1 = luci::clone(const_node1); \
+ quant_const(new_const1, _output_type); \
+ node->INPUT_NAME1(new_const1); \
+ } \
+ auto input2 = node->INPUT_NAME2(); \
+ auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \
+ if (const_node2 && !is_quantized(const_node2)) \
+ { \
+ auto new_const2 = luci::clone(const_node2); \
+ quant_const(new_const2, _output_type); \
+ node->INPUT_NAME2(new_const2); \
+ } \
+ }
+
+// Ops that receive a single activation as an input
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleDepthToSpace, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleElu, features)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleExp, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleFloor, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleGather, params)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLocalResponseNormalization, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLogistic, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMean, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMirrorPad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CirclePad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceAny, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceProd, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReshape, tensor)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeBilinear, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeNearestNeighbor, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReverseSequence, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleRsqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSoftmax, logits)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToBatchND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToDepth, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplit, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplitV, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleStridedSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSum, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTanh, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTile, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTopKV2, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTranspose, a)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleUnpack, value)
+
+// Ops that receive two activations as inputs
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleAdd, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleBatchMatMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleFloorDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreater, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreaterEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLess, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLessEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMaximum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMinimum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleNotEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CirclePow, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleSub, x, y)
+
+// AddN has arbitrary number of inputs
+void QuantizeConstInputActivation::visit(luci::CircleAddN *node)
+{
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->inputs(i);
+ auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+ if (const_node && !is_quantized(const_node))
+ {
+ auto new_const = luci::clone(const_node);
+ quant_const(new_const, _output_type);
+ node->inputs(i, new_const);
+ }
+ }
+}
+
+#undef QUANTIZE_SINGLE_CONST_INPUT
+#undef QUANTIZE_TWO_CONST_INPUTS
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeActivation.h b/compiler/luci/pass/src/QuantizeActivation.h
new file mode 100644
index 000000000..fc32d1cde
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZATION_ACTIVATION_H__
+#define __LUCI_QUANTIZATION_ACTIVATION_H__
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Quantize non-const activation using recorded min/max values
+ */
+struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+ QuantizeActivation(loco::DataType input, loco::DataType output)
+ : input_type(input), output_type(output)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+
+ // Quantize each node using recorded min/max
+ void visit(luci::CircleNode *node);
+};
+
+/**
+ * @brief Quantize non-const activaion using pre-defined scale/zp for special Ops
+ */
+struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+ QuantizeSpecialActivation(loco::DataType input, loco::DataType output)
+ : input_type(input), output_type(output)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+
+ void visit(luci::CircleNode *node);
+ void visit(luci::CircleLogistic *node);
+ void visit(luci::CircleTanh *node);
+ void visit(luci::CircleSoftmax *node);
+ void visit(luci::CircleFloor *node);
+ void visit(luci::CircleFloorDiv *node);
+ void visit(luci::CircleFloorMod *node);
+ void visit(luci::CircleCeil *node);
+};
+
+// Quantize constant input activation of a node
+// The input of a node is quantized if it is
+// 1. Constant (instance of CircleConst*)
+// 2. Activation (other inputs e.g., weights, bias, axis, etc should not be quantized here)
+struct QuantizeConstInputActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+ QuantizeConstInputActivation(loco::DataType output_type) : _output_type(output_type) {}
+
+private:
+ loco::DataType _output_type;
+
+// Skip NODE
+#define SKIP(NODE) \
+ void visit(NODE *) {}
+
+ // Handled in QuantizeWeights and QuantizeBias
+ SKIP(luci::CircleConv2D)
+ SKIP(luci::CircleDepthwiseConv2D)
+ SKIP(luci::CircleFullyConnected)
+ SKIP(luci::CircleInstanceNorm)
+ SKIP(luci::CirclePRelu)
+ SKIP(luci::CircleTransposeConv)
+
+ // Handled in PropagateQParamBackwardPass
+ SKIP(luci::CircleConcatenation)
+ SKIP(luci::CirclePadV2)
+ SKIP(luci::CirclePack)
+ SKIP(luci::CircleOneHot)
+
+ // Inputs of logical Ops are bool, thus not quantized
+ SKIP(luci::CircleLogicalOr)
+ SKIP(luci::CircleLogicalAnd)
+ SKIP(luci::CircleLogicalNot)
+
+#undef SKIP
+
+ // Default behavior (NYI)
+ void visit(luci::CircleNode *node);
+
+ // Ops that receive a single activation as an input
+ void visit(luci::CircleArgMax *node);
+ void visit(luci::CircleArgMin *node);
+ void visit(luci::CircleBatchToSpaceND *node);
+ void visit(luci::CircleDepthToSpace *node);
+ void visit(luci::CircleElu *node);
+ void visit(luci::CircleExp *node);
+ void visit(luci::CircleFloor *node);
+ void visit(luci::CircleGather *node);
+ void visit(luci::CircleLocalResponseNormalization *node);
+ void visit(luci::CircleLogistic *node);
+ void visit(luci::CircleMean *node);
+ void visit(luci::CircleMirrorPad *node);
+ void visit(luci::CirclePad *node);
+ void visit(luci::CircleReduceAny *node);
+ void visit(luci::CircleReduceProd *node);
+ void visit(luci::CircleReduceMax *node);
+ void visit(luci::CircleReduceMin *node);
+ void visit(luci::CircleReshape *node);
+ void visit(luci::CircleResizeBilinear *node);
+ void visit(luci::CircleResizeNearestNeighbor *node);
+ void visit(luci::CircleReverseSequence *node);
+ void visit(luci::CircleRsqrt *node);
+ void visit(luci::CircleSlice *node);
+ void visit(luci::CircleSoftmax *node);
+ void visit(luci::CircleSpaceToBatchND *node);
+ void visit(luci::CircleSpaceToDepth *node);
+ void visit(luci::CircleSplit *node);
+ void visit(luci::CircleSplitV *node);
+ void visit(luci::CircleSqrt *node);
+ void visit(luci::CircleStridedSlice *node);
+ void visit(luci::CircleSum *node);
+ void visit(luci::CircleTanh *node);
+ void visit(luci::CircleTile *node);
+ void visit(luci::CircleTopKV2 *node);
+ void visit(luci::CircleTranspose *node);
+ void visit(luci::CircleUnpack *node);
+
+ // Ops that receive two activations as inputs
+ void visit(luci::CircleAdd *node);
+ void visit(luci::CircleBatchMatMul *node);
+ void visit(luci::CircleDiv *node);
+ void visit(luci::CircleEqual *node);
+ void visit(luci::CircleFloorDiv *node);
+ void visit(luci::CircleGreater *node);
+ void visit(luci::CircleGreaterEqual *node);
+ void visit(luci::CircleLess *node);
+ void visit(luci::CircleLessEqual *node);
+ void visit(luci::CircleMaximum *node);
+ void visit(luci::CircleMinimum *node);
+ void visit(luci::CircleMul *node);
+ void visit(luci::CircleNotEqual *node);
+ void visit(luci::CirclePow *node);
+ void visit(luci::CircleSub *node);
+
+ // AddN has arbitrary number of inputs
+ void visit(luci::CircleAddN *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZATION_ACTIVATION_H__
diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp
new file mode 100644
index 000000000..aa496232a
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace luci;
+
+namespace
+{
+
+// struct to carry Input/Weights/Bias
+struct IWB
+{
+ CircleNode *input = nullptr;
+ CircleNode *weights = nullptr;
+ CircleConst *bias = nullptr;
+
+ IWB(loco::Node *i, loco::Node *w, loco::Node *b)
+ {
+ input = dynamic_cast<luci::CircleNode *>(i);
+ weights = dynamic_cast<luci::CircleNode *>(w);
+ bias = dynamic_cast<luci::CircleConst *>(b);
+ }
+
+ // Return true if bias can be quantized with valid input an weights
+ operator bool()
+ {
+ if (bias == nullptr || is_quantized(bias))
+ return false;
+ if (input == nullptr || weights == nullptr)
+ return false;
+ return true;
+ }
+};
+
+// Create a new const node from an existing node.
+// The new node has the following characteristics
+// type: T
+// shape: same with 'node' (given as an argument)
+// buffer size: 'size' (given as an argument)
+// Note that contents are not filled in this function.
+template <loco::DataType T>
+luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
+{
+ auto new_node = node->graph()->nodes()->create<CircleConst>();
+ // TODO: We don't have any naming convention for quantized nodes yet.
+ // Fix this when we have one.
+ new_node->name(node->name());
+ new_node->dtype(T);
+ new_node->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ new_node->dim(i).set(node->dim(i).value());
+
+ new_node->size<T>(size);
+ new_node->shape_status(luci::ShapeStatus::VALID);
+
+ return new_node;
+}
+
+CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+ float *scaling_factor, int64_t *zp)
+{
+ float scale = input_scale * weight_scale;
+ const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ quantized_values[i] =
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ }
+
+ auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+ const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+ const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ new_bias->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+ *scaling_factor = scale;
+ *zp = 0;
+
+ return new_bias;
+}
+
+CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
+ std::vector<float> &weight_scale,
+ std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+{
+ float scaling_factor_inv{0};
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ scaling_factor[i] = input_scale * weight_scale[i];
+ scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+ quantized_values[i] =
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ zp[i] = 0;
+ }
+
+ auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+ const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+ const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ new_bias->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+
+ return new_bias;
+}
+
+CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
+ std::vector<float> &weight_scale,
+ std::vector<float> &scaling_factor,
+ std::vector<int64_t> &zp)
+{
+ float scaling_factor_inv{0};
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int64_t> quantized_values(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ scaling_factor[i] = input_scale * weight_scale[i];
+ scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+ quantized_values[i] =
+ static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ zp[i] = 0;
+ }
+
+ auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
+ }
+
+ return new_bias;
+}
+
+} // namespace
+
+namespace luci
+{
+
+// Return a quantized bias node
+CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *weight,
+ CircleNode *bias)
+{
+ auto const_bias = loco::must_cast<luci::CircleConst *>(bias);
+ assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+ // If input is const, it is quantized here, not in QuantizeActivation
+ if (auto const_input = dynamic_cast<luci::CircleConst *>(input))
+ {
+ quant_const(const_input, output_type);
+ }
+
+ CircleConst *new_bias = nullptr;
+
+ if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ auto input_q = input->quantparam();
+ assert(input_q);
+ assert(input_q->scale.size() == 1); // input scale's layer-wise
+ auto input_scale = input_q->scale[0];
+
+ assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+ auto weight_scale = weight->quantparam()->scale;
+
+ uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
+ assert(size == weight_scale.size());
+ std::vector<float> scaling_factor(size);
+ std::vector<int64_t> zp(size);
+
+ if (output_type == loco::DataType::U8)
+ {
+ new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+ }
+ else if (output_type == loco::DataType::S16)
+ {
+ new_bias =
+ int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported quantization type.");
+ }
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+ new_bias->quantparam(std::move(quantparam));
+
+ return new_bias;
+ }
+ else
+ {
+ auto input_q = input->quantparam();
+ assert(input_q);
+ assert(input_q->scale.size() == 1); // Only support per-layer quant
+ auto input_scale = input_q->scale[0];
+
+ auto weight_q = weight->quantparam();
+ assert(weight_q);
+ assert(weight_q->scale.size() == 1); // Only support per-layer quant
+ auto weight_scale = weight_q->scale[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ new_bias =
+ asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+ new_bias->quantparam(std::move(quantparam));
+
+ return new_bias;
+ }
+}
+
+void QuantizeBias::visit(luci::CircleConv2D *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+ if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+ {
+ auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+ node->bias(new_bias);
+ }
+}
+
+void QuantizeBias::visit(luci::CircleDepthwiseConv2D *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+ if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+ {
+ auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+ node->bias(new_bias);
+ }
+}
+
+void QuantizeBias::visit(luci::CircleTransposeConv *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+ if (auto iwb = IWB(node->outBackprop(), node->filter(), node->bias()))
+ {
+ auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+ node->bias(new_bias);
+ }
+}
+
+void QuantizeBias::visit(luci::CircleFullyConnected *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeBias visit node: " << node->name() << std::endl;
+
+ if (auto iwb = IWB(node->input(), node->weights(), node->bias()))
+ {
+ auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+ node->bias(new_bias);
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeBias.h b/compiler/luci/pass/src/QuantizeBias.h
new file mode 100644
index 000000000..8de09df72
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_BIAS_H__
+#define __LUCI_QUANTIZE_BIAS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeBias quantizes tensors for bias
+ * @details Use input/weights scale to quantize values
+ */
+struct QuantizeBias final : public luci::CircleNodeMutableVisitor<void>
+{
+ QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+ : input_type(input), output_type(output), granularity(gr)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+ QuantizationGranularity granularity;
+
+private:
+ // Return a quantized bias node
+ CircleConst *quantized_bias(CircleNode *input, const CircleNode *weight, CircleNode *bias);
+
+ void visit(luci::CircleConv2D *node);
+ void visit(luci::CircleDepthwiseConv2D *node);
+ void visit(luci::CircleTransposeConv *node);
+ void visit(luci::CircleFullyConnected *node);
+
+ // Default behavior
+ void visit(luci::CircleNode *) {}
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_BIAS_H__
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
index c8ad87e3d..c9b35e0be 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -16,9 +16,11 @@
#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
#include "QuantizationUtils.h"
+#include "helpers/LayerInfoMap.h"
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
#include <luci/Log.h>
#include <loco/IR/TensorShape.h>
@@ -251,7 +253,7 @@ void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_
* @brief QuantizeDequantizeWeights quantizes and dequantizes tensors for weights
* @details Find min/max values on the fly, quantize the model, and dequantize the model
*/
-struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<void>
{
QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
QuantizationGranularity granularity)
@@ -263,88 +265,164 @@ struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<b
loco::DataType output_type;
QuantizationGranularity granularity;
- // Quantize and dequantize input tensors of each node
- bool visit(luci::CircleNode *node)
+private:
+ // Fake quantize weights (Only u8 quantization is supported for LWQ)
+ void fake_quantize_lwq(luci::CircleConst *weights) const
{
- assert(output_type == loco::DataType::U8 || output_type == loco::DataType::S16);
- LOGGER(l);
- INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
- auto arity = node->arity();
- for (uint32_t i = 0; i < arity; i++)
+ assert(output_type == loco::DataType::U8); // FIX_CALLER_UNLESS
+
+ // Find min/max per layer
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
{
- auto input_node = node->arg(i);
- auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+ auto data = weights->at<loco::DataType::FLOAT32>(i);
+ min = data < min ? data : min;
+ max = data > max ? data : max;
+ }
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ asymmetric_wdequant_with_minmax_per_layer(weights, scaling_factor, nudged_min);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->min.push_back(nudged_min);
+ quantparam->max.push_back(nudged_max);
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ weights->quantparam(std::move(quantparam));
+ }
- // Check if this is already quantized
- if (is_quantized(circle_node))
- continue;
+private:
+ // Fake quantize weights (u8/s16 quantization are supported for CWQ)
+ void fake_quantize_cwq(luci::CircleConst *weights) const
+ {
+ assert(output_type == loco::DataType::U8 ||
+ output_type == loco::DataType::S16); // FIX_CALLER_UNLESS
- if (is_weights(circle_node))
- {
- auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
+ // Find min/max per channel
+ std::vector<float> min;
+ std::vector<float> max;
- // Find min/max per channel-wise
- if (granularity == QuantizationGranularity::ChannelWise)
- {
- std::vector<float> min;
- std::vector<float> max;
-
- cal_minmax_per_channel(circle_const, min, max);
-
- std::vector<float> nudged_min(min.size());
- std::vector<float> nudged_max(min.size());
- std::vector<float> scaling_factor(min.size());
- std::vector<int64_t> zp(min.size());
-
- if (output_type == loco::DataType::U8)
- {
- asymmetric_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
- nudged_max);
- asymmetric_wdequant_per_channel(circle_const, scaling_factor, nudged_min);
- }
- else
- {
- sym_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
- nudged_max);
- sym_wdequant_per_channel(circle_const, scaling_factor);
- }
-
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->min = nudged_min;
- quantparam->max = nudged_max;
- quantparam->scale = scaling_factor;
- quantparam->zerop = zp;
- circle_node->quantparam(std::move(quantparam));
- }
- // Find min/max per layer-wise
- else
- {
- float min = std::numeric_limits<float>::max();
- float max = std::numeric_limits<float>::lowest();
- for (uint32_t i = 0; i < circle_const->size<loco::DataType::FLOAT32>(); i++)
- {
- auto data = circle_const->at<loco::DataType::FLOAT32>(i);
- min = data < min ? data : min;
- max = data > max ? data : max;
- }
- float scaling_factor{0};
- int64_t zp{0};
- float nudged_min{0};
- float nudged_max{0};
-
- asymmetric_wquant_with_minmax_per_layer(circle_const, min, max, scaling_factor, zp,
- nudged_min, nudged_max);
- asymmetric_wdequant_with_minmax_per_layer(circle_const, scaling_factor, nudged_min);
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->min.push_back(nudged_min);
- quantparam->max.push_back(nudged_max);
- quantparam->scale.push_back(scaling_factor);
- quantparam->zerop.push_back(zp);
- circle_node->quantparam(std::move(quantparam));
- }
- }
+ cal_minmax_per_channel(weights, min, max);
+
+ std::vector<float> nudged_min(min.size());
+ std::vector<float> nudged_max(min.size());
+ std::vector<float> scaling_factor(min.size());
+ std::vector<int64_t> zp(min.size());
+
+ if (output_type == loco::DataType::U8)
+ {
+ asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max);
+ asymmetric_wdequant_per_channel(weights, scaling_factor, nudged_min);
+ }
+ else
+ {
+ sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max);
+ sym_wdequant_per_channel(weights, scaling_factor);
}
- return false;
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->min = nudged_min;
+ quantparam->max = nudged_max;
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ weights->quantparam(std::move(quantparam));
+ }
+
+private:
+ void fake_quantize(luci::CircleConst *weights) const
+ {
+ switch (granularity)
+ {
+ case luci::QuantizationGranularity::ChannelWise:
+ fake_quantize_cwq(weights);
+ break;
+ case luci::QuantizationGranularity::LayerWise:
+ fake_quantize_lwq(weights);
+ break;
+ default:
+ throw std::invalid_argument("Unsupported granularity");
+ }
+ }
+
+private:
+ // Check if
+ // 1. node is const
+ // 2. node was not quantized
+ bool is_quantizable(loco::Node *node)
+ {
+ auto const_node = dynamic_cast<luci::CircleConst *>(node);
+ if (not const_node)
+ return false;
+
+ // Skip if this is already quantized
+ if (is_quantized(const_node))
+ return false;
+
+ return true;
+ }
+
+ // Default behavior (Do nothing)
+ void visit(luci::CircleNode *) {}
+
+ void visit(luci::CircleConv2D *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+ if (not is_quantizable(node->filter()))
+ return;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ fake_quantize(new_weights);
+ }
+
+ void visit(luci::CircleDepthwiseConv2D *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+ if (not is_quantizable(node->filter()))
+ return;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ fake_quantize(new_weights);
+ }
+
+ void visit(luci::CircleTransposeConv *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+ if (not is_quantizable(node->filter()))
+ return;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ fake_quantize(new_weights);
+ }
+
+ void visit(luci::CircleFullyConnected *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+ if (not is_quantizable(node->weights()))
+ return;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+ auto new_weights = luci::clone(weights);
+ node->weights(new_weights);
+ fake_quantize(new_weights);
}
};
@@ -355,11 +433,36 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
LOGGER(l);
INFO(l) << "QuantizeDequantizeWeightsPass Start" << std::endl;
+ auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+ auto quantize_dtype = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization dtype
+ if (iter != info_by_name.end())
+ return iter->second.dtype;
+
+ // Return default quantization dtype
+ return _ctx->output_model_dtype;
+ };
+
+ auto quantize_granularity = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization granularity
+ if (iter != info_by_name.end())
+ return iter->second.granularity;
+
+ // Return default quantization granularity
+ return _ctx->granularity;
+ };
+
// Quantize weights
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ QuantizeDequantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+ quantize_granularity(circle_node));
circle_node->accept(&qw);
}
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
index f226253c2..15f5ca7ac 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
@@ -25,3 +25,17 @@ TEST(QuantizeDequantizeWeightsPassTest, name)
auto const name = pass.name();
ASSERT_NE(nullptr, name);
}
+
+TEST(QuantizeDequantizeWeightsPassTest, name_ctx)
+{
+ auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+ {
+ ctx->input_model_dtype = loco::DataType::FLOAT32;
+ ctx->output_model_dtype = loco::DataType::U8;
+ ctx->granularity = luci::QuantizationGranularity::LayerWise;
+ }
+
+ luci::QuantizeDequantizeWeightsPass pass(std::move(ctx));
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp
new file mode 100644
index 000000000..4b3b7e330
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void check_const_opcode(luci::CircleNode *node)
+{
+ if (node == nullptr)
+ return;
+
+ if (node->opcode() != luci::CircleOpcode::CIRCLECONST and
+ node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ {
+ throw std::runtime_error("Unsupported non const input " + node->name());
+ }
+}
+
+struct ConstInputChecker final : public luci::CircleNodeMutableVisitor<void>
+{
+// INPUT_NAME is name for input const for current NODE
+#define CHECK_NODE_WITH_ONE_INPUT_CONST(NODE, INPUT_NAME) \
+ void visit(NODE *node) \
+ { \
+ const auto input = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME()); \
+ check_const_opcode(input); \
+ }
+
+// INPUT_NAME_1 and INPUT_NAME_2 are names for input const for current NODE
+#define CHECK_NODE_WITH_TWO_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2) \
+ void visit(NODE *node) \
+ { \
+ const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \
+ const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \
+ \
+ check_const_opcode(input_1); \
+ check_const_opcode(input_2); \
+ }
+
+// INPUT_NAME_1, INPUT_NAME_2 and INPUT_NAME_3 are names for input const for current NODE
+#define CHECK_NODE_WITH_THREE_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2, INPUT_NAME_3) \
+ void visit(NODE *node) \
+ { \
+ const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \
+ const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \
+ const auto input_3 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_3()); \
+ \
+ check_const_opcode(input_1); \
+ check_const_opcode(input_2); \
+ check_const_opcode(input_3); \
+ }
+
+ // Skip other circle node
+ void visit(luci::CircleNode *) {}
+
+ // Ops that receive one const nodes as inputs
+ CHECK_NODE_WITH_ONE_INPUT_CONST(luci::CirclePRelu, alpha)
+
+ // Ops that receive two const node as an inputs
+ CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleConv2D, filter, bias)
+ CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleDepthwiseConv2D, filter, bias)
+ CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleFullyConnected, weights, bias)
+ CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleInstanceNorm, gamma, beta)
+
+ // Ops that receive three const nodes as an inputs
+ CHECK_NODE_WITH_THREE_INPUT_CONST(luci::CircleTransposeConv, inputSizes, filter, bias)
+
+#undef CHECK_NODE_WITH_ONE_INPUT_CONST
+#undef CHECK_NODE_WITH_TWO_INPUT_CONST
+#undef CHECK_NODE_WITH_THREE_INPUT_CONST
+};
+
+} // namespace
+
+/**
+ * Verify the input model has the form acceptable by quantizer
+ */
+bool QuantizePreCheckerPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizePreCheckerPass Start" << std::endl;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ // Check const inputs
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ ConstInputChecker checker{};
+ circle_node->accept(&checker);
+ }
+
+ INFO(l) << "QuantizePreCheckerPass End" << std::endl;
+
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp
new file mode 100644
index 000000000..788353cd8
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+class SimpleConv2DGraph
+{
+public:
+ SimpleConv2DGraph(bool make_valid)
+ {
+ conv2d_node = g.nodes()->create<luci::CircleConv2D>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+ filter = g.nodes()->create<luci::CircleConst>();
+
+ conv2d_node->input(input_1);
+ conv2d_node->filter(filter);
+
+ if (make_valid)
+ {
+ bias = g.nodes()->create<luci::CircleConst>();
+ conv2d_node->bias(bias);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ conv2d_node->bias(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(conv2d_node);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CircleConv2D *conv2d_node = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *filter = nullptr;
+ luci::CircleConst *bias = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class SimpleDepthConv2DGraph
+{
+public:
+ SimpleDepthConv2DGraph(bool make_valid)
+ {
+ depth_conv2d_node = g.nodes()->create<luci::CircleDepthwiseConv2D>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+ filter = g.nodes()->create<luci::CircleConst>();
+
+ depth_conv2d_node->input(input_1);
+ depth_conv2d_node->filter(filter);
+
+ if (make_valid)
+ {
+ bias = g.nodes()->create<luci::CircleConst>();
+ depth_conv2d_node->bias(bias);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ depth_conv2d_node->bias(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(depth_conv2d_node);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CircleDepthwiseConv2D *depth_conv2d_node = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *filter = nullptr;
+ luci::CircleConst *bias = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class SimpleFCGraph
+{
+public:
+ SimpleFCGraph(bool make_valid)
+ {
+ fc_node = g.nodes()->create<luci::CircleFullyConnected>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+ weights = g.nodes()->create<luci::CircleConst>();
+
+ fc_node->input(input_1);
+ fc_node->weights(weights);
+
+ if (make_valid)
+ {
+ bias = g.nodes()->create<luci::CircleConst>();
+ fc_node->bias(bias);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ fc_node->bias(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(fc_node);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CircleFullyConnected *fc_node = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *weights = nullptr;
+ luci::CircleConst *bias = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class SimpleInstanceNormGraph
+{
+public:
+ SimpleInstanceNormGraph(bool make_valid)
+ {
+ instance_norm_node = g.nodes()->create<luci::CircleInstanceNorm>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+ gamma = g.nodes()->create<luci::CircleConst>();
+
+ instance_norm_node->input(input_1);
+ instance_norm_node->gamma(gamma);
+
+ if (make_valid)
+ {
+ beta = g.nodes()->create<luci::CircleConst>();
+ instance_norm_node->beta(beta);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ instance_norm_node->beta(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(instance_norm_node);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CircleInstanceNorm *instance_norm_node = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *gamma = nullptr;
+ luci::CircleConst *beta = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class SimpleTransposeConvGraph
+{
+public:
+ SimpleTransposeConvGraph(bool make_valid)
+ {
+ transpose_conv = g.nodes()->create<luci::CircleTransposeConv>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+
+ input_sizes = g.nodes()->create<luci::CircleConst>();
+ filter = g.nodes()->create<luci::CircleConst>();
+
+ transpose_conv->outBackprop(input_1);
+ transpose_conv->filter(filter);
+ transpose_conv->inputSizes(input_sizes);
+
+ if (make_valid)
+ {
+ bias = g.nodes()->create<luci::CircleConst>();
+ transpose_conv->bias(bias);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ transpose_conv->bias(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(transpose_conv);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CircleTransposeConv *transpose_conv = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *input_sizes = nullptr;
+ luci::CircleConst *filter = nullptr;
+ luci::CircleConst *bias = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class SimplePReluGraph
+{
+public:
+ SimplePReluGraph(bool make_valid)
+ {
+ prelu = g.nodes()->create<luci::CirclePRelu>();
+ input_1 = g.nodes()->create<luci::CircleInput>();
+
+ prelu->input(input_1);
+
+ if (make_valid)
+ {
+ alpha = g.nodes()->create<luci::CircleConst>();
+ prelu->alpha(alpha);
+ }
+ else
+ {
+ input_2 = g.nodes()->create<luci::CircleInput>();
+ prelu->alpha(input_2);
+ }
+
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ output->from(prelu);
+ }
+
+public:
+ loco::Graph g;
+
+private:
+ luci::CirclePRelu *prelu = nullptr;
+ luci::CircleInput *input_1 = nullptr;
+ luci::CircleInput *input_2 = nullptr;
+ luci::CircleConst *alpha = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+TEST(QuantizePreCheckerPassTest, name)
+{
+ luci::QuantizePreCheckerPass pass{};
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+// Test Conv2d
+TEST(QuantizePreCheckerPassTest, conv2d)
+{
+ SimpleConv2DGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, conv2d_NEG)
+{
+ SimpleConv2DGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test DepthwiseConv2d
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d)
+{
+ SimpleDepthConv2DGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d_NEG)
+{
+ SimpleDepthConv2DGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test FullyConnected
+TEST(QuantizePreCheckerPassTest, fully_connected)
+{
+ SimpleFCGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, fully_connected_NEG)
+{
+ SimpleFCGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test InstanceNorm
+TEST(QuantizePreCheckerPassTest, instance_norm)
+{
+ SimpleInstanceNormGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, instance_norm_NEG)
+{
+ SimpleInstanceNormGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test TransposeConv
+TEST(QuantizePreCheckerPassTest, transpose_conv)
+{
+ SimpleTransposeConvGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, transpose_conv_NEG)
+{
+ SimpleTransposeConvGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test PRelu
+TEST(QuantizePreCheckerPassTest, prelu)
+{
+ SimplePReluGraph valid_graph(true);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, prelu_NEG)
+{
+ SimplePReluGraph invalid_graph(false);
+
+ luci::QuantizePreCheckerPass checker{};
+
+ EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp
new file mode 100644
index 000000000..11322ab44
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeWeights.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <vector>
+#include <functional>
+
+using namespace luci;
+
+namespace
+{
+
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ func(indices, dimension, channel_dim_index);
+ }
+ }
+ }
+ }
+}
+
+void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+ std::vector<float> &scaling_factor, int32_t &channel_dim_index)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
+ };
+
+ iterate_per_channel(node, channel_dim_index, quantize);
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+ int32_t &channel_dim_index)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+ const int32_t kMinScale = -kMaxScale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
+ };
+
+ iterate_per_channel(node, channel_dim_index, quantize);
+
+ node->dtype(loco::DataType::S16); // change the type of tensor
+ node->size<loco::DataType::S16>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S16>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
+{
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+
+ const float scaling_factor_inv = 1.0 / scaling_factor;
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+// Quantize const per channel
+//
+// The last dimension of const is the same as the dimension of channel
+// And the rest of the const dimensions should be 1
+// So, a 'single value' is quantized per channel
+//
+// Quantization spec (f: fp value, q: quantized value)
+//
+// uint8
+// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
+//
+// int16
+// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
+void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+ assert(node->rank() > 0);
+
+ for (uint32_t i = 0; i < node->rank() - 1; i++)
+ {
+ // Caller should call this function when the below condition is satisfied
+ if (node->dim(i).value() != 1)
+ throw std::runtime_error("Non-channel dimension of const node must be 1");
+ }
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ assert(size == node->dim(node->rank() - 1).value());
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->quantized_dimension = node->rank() - 1;
+ std::vector<int32_t> quantized_data(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ if (quant_type == loco::DataType::U8)
+ {
+ if (data >= 0)
+ {
+ quantparam->scale.push_back(data);
+ quantparam->zerop.push_back(0);
+ quantized_data[i] = 1;
+ }
+ else
+ {
+ quantparam->scale.push_back(-data);
+ quantparam->zerop.push_back(1);
+ quantized_data[i] = 0;
+ }
+ }
+ else if (quant_type == loco::DataType::S16)
+ {
+ if (data >= 0)
+ {
+ quantparam->scale.push_back(data);
+ quantized_data[i] = 1;
+ }
+ else
+ {
+ quantparam->scale.push_back(-data);
+ quantized_data[i] = -1;
+ }
+ quantparam->zerop.push_back(0);
+ }
+ }
+ node->quantparam(std::move(quantparam));
+
+ switch (quant_type)
+ {
+ case loco::DataType::U8:
+ node->dtype(loco::DataType::U8);
+ node->size<loco::DataType::U8>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(quantized_data[i] == 0 || quantized_data[i] == 1);
+ node->at<loco::DataType::U8>(i) = quantized_data[i];
+ }
+ break;
+ case loco::DataType::S16:
+ node->dtype(loco::DataType::S16);
+ node->size<loco::DataType::S16>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(quantized_data[i] == -1 || quantized_data[i] == 1);
+ node->at<loco::DataType::S16>(i) = quantized_data[i];
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
+{
+ // Find min/max per channel-wise
+ if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ auto quantparam = weights->quantparam();
+ if (quantparam == nullptr)
+ {
+ assert(false && "quantparam is nullptr");
+ return;
+ }
+
+ auto min = quantparam->min;
+ auto scaling_factor = quantparam->scale;
+ int32_t channel_dim_index = 0;
+
+ if (output_type == loco::DataType::U8)
+ {
+ asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
+ }
+ else
+ {
+ sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
+ }
+ quantparam->min.clear();
+ quantparam->max.clear();
+ quantparam->quantized_dimension = channel_dim_index;
+ }
+ // Find min/max per layer-wise
+ else
+ {
+ // Quantize using recorded quantparam
+ auto quantparam = weights->quantparam();
+ assert(quantparam != nullptr);
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->scale.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto scaling_factor = quantparam->scale[0];
+ asym_wquant_per_layer(weights, min, scaling_factor);
+ quantparam->min.clear();
+ quantparam->max.clear();
+ }
+}
+void QuantizeWeights::visit(luci::CircleConv2D *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ }
+}
+
+void QuantizeWeights::visit(luci::CircleDepthwiseConv2D *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ }
+}
+
+void QuantizeWeights::visit(luci::CircleInstanceNorm *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+ auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+
+ if (!is_quantized(gamma))
+ {
+ assert(gamma->dtype() == loco::DataType::FLOAT32);
+ auto new_gamma = luci::clone(gamma);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_gamma, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_gamma, output_type);
+ node->gamma(new_gamma);
+ }
+ if (!is_quantized(beta))
+ {
+ assert(beta->dtype() == loco::DataType::FLOAT32);
+ auto new_beta = luci::clone(beta);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_beta, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_beta, output_type);
+ node->beta(new_beta);
+ }
+}
+
+void QuantizeWeights::visit(luci::CirclePRelu *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+
+ if (!is_quantized(alpha))
+ {
+ assert(alpha->dtype() == loco::DataType::FLOAT32);
+ auto new_alpha = luci::clone(alpha);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_alpha, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_alpha, output_type);
+ node->alpha(new_alpha);
+ }
+}
+
+void QuantizeWeights::visit(luci::CircleTransposeConv *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ }
+}
+
+void QuantizeWeights::visit(luci::CircleFullyConnected *node)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->weights(new_weights);
+ quantize_weights(new_weights);
+ }
+}
+
+void QuantizeWeights::visit(luci::CircleNode *) {}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWeights.h b/compiler/luci/pass/src/QuantizeWeights.h
new file mode 100644
index 000000000..f62cd40f3
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_WEIGHTS_H__
+#define __LUCI_QUANTIZE_WEIGHTS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeWeights quantizes tensors for weights
+ * @details Find min/max values on the fly and then quantize
+ */
+struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<void>
+{
+ QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+ : input_type(input), output_type(output), granularity(gr)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+ QuantizationGranularity granularity;
+
+private:
+ void quantize_weights(luci::CircleConst *weights);
+
+ void visit(luci::CircleConv2D *node);
+ void visit(luci::CircleDepthwiseConv2D *node);
+ void visit(luci::CircleInstanceNorm *node);
+ void visit(luci::CirclePRelu *node);
+ void visit(luci::CircleTransposeConv *node);
+ void visit(luci::CircleFullyConnected *node);
+ void visit(luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_WEIGHTS_H__
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index c3552ec52..d9a9d4db7 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -15,55 +15,32 @@
*/
#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+#include "QuantizeActivation.h"
+#include "QuantizeWeights.h"
+#include "QuantizeBias.h"
#include "QuantizationUtils.h"
+#include "ProgressReporter.h"
+#include "helpers/LayerInfoMap.h"
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
#include <luci/Service/Nodes/CircleConst.h>
#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Log.h>
+#include <logo/Phase.h>
#include <oops/UserExn.h>
#include <iostream>
#include <cmath>
-#include <functional>
namespace
{
using namespace luci;
-using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
-
-void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
-{
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
- };
-
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
-
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- func(indices, dimension, channel_dim_index);
- }
- }
- }
- }
-}
-
// Create a Quantize Op whose
// dtype is out_type
// shape is the same with node
@@ -80,7 +57,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
quantize->shape_status(luci::ShapeStatus::VALID);
auto qparam = node->quantparam();
- assert(qparam); // FIX_CALLER_UNLESS
+ assert(qparam); // FIX_CALLER_UNLESS
+
+ auto qtype = luci::activation_qtype(node);
+ if (qtype == ActivationQType::PreDefinedValue)
+ {
+ quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type));
+ return quantize;
+ }
+
+ assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale);
+
assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
auto min = qparam->min[0];
@@ -104,9 +91,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
auto quantparam = std::make_unique<CircleQuantParam>();
quantparam->scale.push_back(scaling_factor);
quantparam->zerop.push_back(zp);
+ // Save original min/max (not nudged_min/max). Nudged min/max
+ // is different from the real min/max values, causing wrong
+ // qparam when quantization dtype is changed.
+ quantparam->min.push_back(min);
+ quantparam->max.push_back(max);
quantize->quantparam(std::move(quantparam));
+ if (qtype == ActivationQType::IntScale)
+ set_int_scale(quantize);
+
return quantize;
}
@@ -118,1412 +113,232 @@ namespace luci
namespace
{
-// Create a new const node from an existing node.
-// The new node has the following characteristics
-// type: T
-// shape: same with 'node' (given as an argument)
-// buffer size: 'size' (given as an argument)
-// Note that contents are not filled in this function.
-template <loco::DataType T>
-luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
-{
- auto new_node = node->graph()->nodes()->create<CircleConst>();
- // TODO: We don't have any naming convention for quantized nodes yet.
- // Fix this when we have one.
- new_node->name(node->name());
- new_node->dtype(T);
- new_node->rank(node->rank());
- for (uint32_t i = 0; i < node->rank(); i++)
- new_node->dim(i).set(node->dim(i).value());
-
- new_node->size<T>(size);
- new_node->shape_status(luci::ShapeStatus::VALID);
-
- return new_node;
-}
-
-void overwrite_quantparam(luci::CircleNode *source, luci::CircleNode *target)
-{
- auto source_qparam = source->quantparam();
- if (source_qparam == nullptr)
- throw std::runtime_error("source quantparam is not found during overwrite");
-
- auto target_qparam = target->quantparam();
- if (target_qparam == nullptr)
- {
- auto quantparam = std::make_unique<CircleQuantParam>();
- target->quantparam(std::move(quantparam));
- target_qparam = target->quantparam();
-
- if (target_qparam == nullptr)
- throw std::runtime_error("Creating new quant param failed");
- }
- target_qparam->min = source_qparam->min;
- target_qparam->max = source_qparam->max;
- target_qparam->scale = source_qparam->scale;
- target_qparam->zerop = source_qparam->zerop;
- target_qparam->quantized_dimension = source_qparam->quantized_dimension;
-}
-
-void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
- loco::DataType quant_type)
-{
- uint32_t size = const_node->size<loco::DataType::FLOAT32>();
-
- const float scaling_factor_inv = 1.0 / scaling_factor;
- std::vector<int32_t> quantized_values(size);
- for (uint32_t i = 0; i < size; ++i)
- {
- auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i));
- double quantized_float = std::round(data * scaling_factor_inv) + zerop;
- constexpr auto int_max = static_cast<double>(std::numeric_limits<int32_t>::max());
- constexpr auto int_min = static_cast<double>(std::numeric_limits<int32_t>::min());
- quantized_float = std::min(int_max, std::max(int_min, quantized_float));
-
- quantized_values[i] = static_cast<int32_t>(quantized_float);
- }
-
- switch (quant_type)
- {
- case loco::DataType::U8:
- const_node->dtype(loco::DataType::U8); // change the type of tensor
- const_node->size<loco::DataType::U8>(size); // resize tensor
- for (uint32_t i = 0; i < size; ++i)
- const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
- break;
- case loco::DataType::S16:
- assert(zerop == 0);
- const_node->dtype(loco::DataType::S16); // change the type of tensor
- const_node->size<loco::DataType::S16>(size); // resize tensor
- for (uint32_t i = 0; i < size; ++i)
- const_node->at<loco::DataType::S16>(i) =
- std::min(32767, std::max(-32767, quantized_values[i]));
- break;
- default:
- throw std::runtime_error("Unsupported data type");
- }
-}
-
-// Quantize const per channel
-//
-// The last dimension of const is the same as the dimension of channel
-// And the rest of the const dimensions should be 1
-// So, a 'single value' is quantized per channel
-//
-// Quantization spec (f: fp value, q: quantized value)
-//
-// uint8
-// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
-// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
-//
-// int16
-// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
-// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
-void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
-{
- assert(node->dtype() == loco::DataType::FLOAT32);
- assert(node->rank() > 0);
-
- for (uint32_t i = 0; i < node->rank() - 1; i++)
- {
- // Caller should call this function when the below condition is satisfied
- if (node->dim(i).value() != 1)
- throw std::runtime_error("Non-channel dimension of const node must be 1");
- }
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- assert(size == node->dim(node->rank() - 1).value());
-
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->quantized_dimension = node->rank() - 1;
- std::vector<int32_t> quantized_data(size);
-
- for (uint32_t i = 0; i < size; ++i)
- {
- auto data = node->at<loco::DataType::FLOAT32>(i);
- if (quant_type == loco::DataType::U8)
- {
- if (data >= 0)
- {
- quantparam->scale.push_back(data);
- quantparam->zerop.push_back(0);
- quantized_data[i] = 1;
- }
- else
- {
- quantparam->scale.push_back(-data);
- quantparam->zerop.push_back(1);
- quantized_data[i] = 0;
- }
- }
- else if (quant_type == loco::DataType::S16)
- {
- if (data >= 0)
- {
- quantparam->scale.push_back(data);
- quantized_data[i] = 1;
- }
- else
- {
- quantparam->scale.push_back(-data);
- quantized_data[i] = -1;
- }
- quantparam->zerop.push_back(0);
- }
- }
- node->quantparam(std::move(quantparam));
-
- switch (quant_type)
- {
- case loco::DataType::U8:
- node->dtype(loco::DataType::U8);
- node->size<loco::DataType::U8>(size);
- for (uint32_t i = 0; i < size; ++i)
- {
- assert(quantized_data[i] == 0 || quantized_data[i] == 1);
- node->at<loco::DataType::U8>(i) = quantized_data[i];
- }
- break;
- case loco::DataType::S16:
- node->dtype(loco::DataType::S16);
- node->size<loco::DataType::S16>(size);
- for (uint32_t i = 0; i < size; ++i)
- {
- assert(quantized_data[i] == -1 || quantized_data[i] == 1);
- node->at<loco::DataType::S16>(i) = quantized_data[i];
- }
- break;
- default:
- throw std::runtime_error("Unsupported data type");
- }
-}
-
-void quant_const(CircleConst *node, loco::DataType quant_type)
-{
- assert(node->dtype() == loco::DataType::FLOAT32);
-
- float min = std::numeric_limits<float>::max();
- float max = std::numeric_limits<float>::lowest();
- for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
- {
- auto data = node->at<loco::DataType::FLOAT32>(i);
- min = data < min ? data : min;
- max = data > max ? data : max;
- }
-
- float scaling_factor{0.0};
- int64_t zp{0};
- float nudged_min{0.0};
- float nudged_max{0.0};
-
- switch (quant_type)
- {
- case loco::DataType::U8:
- asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
- nudged_max);
- break;
- case loco::DataType::S16:
- symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
- nudged_max);
- break;
- default:
- throw std::runtime_error("Unsupported data type");
- }
-
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->scale.push_back(scaling_factor);
- quantparam->zerop.push_back(zp);
- node->quantparam(std::move(quantparam));
-}
-
-// Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
-// Returns a list of <input, weights, output> vectors for the above operators.
-// Note that it returns a 'list' because bias can be used by multiple operators.
-std::vector<std::vector<loco::Node *>> get_input_weight_output_of_bias(CircleNode *node)
-{
- std::vector<std::vector<loco::Node *>> result;
- auto circle_const = dynamic_cast<CircleConst *>(node);
- if (circle_const == nullptr)
- return result;
-
- auto succs = loco::succs(node);
-
- for (auto out : succs)
- {
- auto conv = dynamic_cast<CircleConv2D *>(out);
- if (conv != nullptr && conv->bias() == circle_const)
- {
- assert(conv->input() != nullptr);
- assert(conv->filter() != nullptr);
- result.push_back({conv->input(), conv->filter(), conv});
- continue;
- }
- auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
- if (dw_conv != nullptr && dw_conv->bias() == circle_const)
- {
- assert(dw_conv->input() != nullptr);
- assert(dw_conv->filter() != nullptr);
- result.push_back({dw_conv->input(), dw_conv->filter(), dw_conv});
- continue;
- }
- auto fc = dynamic_cast<CircleFullyConnected *>(out);
- if (fc != nullptr && fc->bias() == circle_const)
- {
- assert(fc->input() != nullptr);
- assert(fc->weights() != nullptr);
- result.push_back({fc->input(), fc->weights(), fc});
- continue;
- }
- auto tconv = dynamic_cast<CircleTransposeConv *>(out);
- if (tconv != nullptr && tconv->bias() == circle_const)
- {
- assert(tconv->outBackprop() != nullptr);
- assert(tconv->filter() != nullptr);
- result.push_back({tconv->outBackprop(), tconv->filter(), tconv});
- continue;
- }
- }
- return result;
-}
-
-CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
- float *scaling_factor, int64_t *zp)
-{
- float scale = input_scale * weight_scale;
- const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- std::vector<int32_t> quantized_values(size);
- for (uint32_t i = 0; i < size; ++i)
- {
- quantized_values[i] =
- static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
- }
-
- auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
-
- const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
- const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
- for (uint32_t i = 0; i < size; ++i)
- {
- new_bias->at<loco::DataType::S32>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
- }
- *scaling_factor = scale;
- *zp = 0;
-
- return new_bias;
-}
-
-CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
- std::vector<float> &weight_scale,
- std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
-{
- float scaling_factor_inv{0};
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- std::vector<int32_t> quantized_values(size);
-
- for (uint32_t i = 0; i < size; ++i)
- {
- scaling_factor[i] = input_scale * weight_scale[i];
- scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
- quantized_values[i] =
- static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
- zp[i] = 0;
- }
-
- auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
-
- const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
- const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
- for (uint32_t i = 0; i < size; ++i)
- {
- new_bias->at<loco::DataType::S32>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
- }
-
- return new_bias;
-}
-
-CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
- std::vector<float> &weight_scale,
- std::vector<float> &scaling_factor,
- std::vector<int64_t> &zp)
-{
- float scaling_factor_inv{0};
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- std::vector<int64_t> quantized_values(size);
-
- for (uint32_t i = 0; i < size; ++i)
- {
- scaling_factor[i] = input_scale * weight_scale[i];
- scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
- quantized_values[i] =
- static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
- zp[i] = 0;
- }
-
- auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
-
- for (uint32_t i = 0; i < size; ++i)
- {
- new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
- }
-
- return new_bias;
-}
-
-bool has_min_max(const CircleNode *node)
-{
- return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
-}
-
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
- int32_t &channel_dim_index)
-{
- assert(node->dtype() == loco::DataType::FLOAT32);
-
- const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
- const int32_t kMinScale = -kMaxScale;
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- std::vector<int32_t> quantized_values(size);
-
- auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- quantized_values[cal_offset(dimension, indices)] =
- static_cast<int32_t>(std::round(data * scaling_factor_inv));
- };
-
- iterate_per_channel(node, channel_dim_index, quantize);
-
- node->dtype(loco::DataType::S16); // change the type of tensor
- node->size<loco::DataType::S16>(size); // resize tensor
- for (uint32_t i = 0; i < size; ++i)
- {
- node->at<loco::DataType::S16>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
- }
-}
-
-void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
- std::vector<float> &scaling_factor, int32_t &channel_dim_index)
-{
- assert(node->dtype() == loco::DataType::FLOAT32);
-
- const int32_t kMinScale = 0;
- const int32_t kMaxScale = 255;
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
- std::vector<int32_t> quantized_values(size);
-
- auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- quantized_values[cal_offset(dimension, indices)] =
- static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
- };
-
- iterate_per_channel(node, channel_dim_index, quantize);
-
- node->dtype(loco::DataType::U8); // change the type of tensor
- node->size<loco::DataType::U8>(size); // resize tensor
- for (uint32_t i = 0; i < size; ++i)
- {
- node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
- }
-}
-
-void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
-{
- const int32_t kMinScale = 0;
- const int32_t kMaxScale = 255;
-
- uint32_t size = node->size<loco::DataType::FLOAT32>();
-
- const float scaling_factor_inv = 1.0 / scaling_factor;
- std::vector<int32_t> quantized_values(size);
- for (uint32_t i = 0; i < size; ++i)
- {
- auto data = node->at<loco::DataType::FLOAT32>(i);
- quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
- }
-
- node->dtype(loco::DataType::U8); // change the type of tensor
- node->size<loco::DataType::U8>(size); // resize tensor
- for (uint32_t i = 0; i < size; ++i)
- {
- node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
- }
-}
-
-void set_bias(luci::CircleNode *node, luci::CircleConst *bias)
-{
- if (auto conv = dynamic_cast<CircleConv2D *>(node))
- conv->bias(bias);
- else if (auto dconv = dynamic_cast<CircleDepthwiseConv2D *>(node))
- dconv->bias(bias);
- else if (auto tconv = dynamic_cast<CircleTransposeConv *>(node))
- tconv->bias(bias);
- else if (auto fc = dynamic_cast<CircleFullyConnected *>(node))
- fc->bias(bias);
- else
- throw std::runtime_error("Only convolution, depthwise convolution, transposed convolution, and "
- "fully-connected layer have bias");
-}
-
-void set_act_qparam(luci::CircleNode *node, float scale, int64_t zp)
-{
- assert(node); // FIX_CALLER_UNLESS
- assert(node->quantparam()); // FIX_CALLER_UNLESS
-
- auto qparam = node->quantparam();
- assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
- assert(qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
- qparam->scale[0] = scale;
- qparam->zerop[0] = zp;
-}
-
-/**
- * @brief Manually set scale/zp of output tensor of special Ops
- */
-struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void>
-{
- QuantizeSpecialActivation(loco::DataType input, loco::DataType output)
- : input_type(input), output_type(output)
- {
- }
-
- loco::DataType input_type;
- loco::DataType output_type;
-
- void visit(luci::CircleNode *)
- {
- // Do nothing by default
- }
-
- void visit(luci::CircleLogistic *node)
- {
- if (output_type == loco::DataType::U8)
- set_act_qparam(node, 1.0f / 256.0f, 0);
- else
- {
- assert(output_type == loco::DataType::S16);
- set_act_qparam(node, 1.0f / 32768.0f, 0);
- }
- }
-
- void visit(luci::CircleTanh *node)
- {
- if (output_type == loco::DataType::U8)
- set_act_qparam(node, 2.0f / 256.0f, 128);
- else
- {
- assert(output_type == loco::DataType::S16);
- set_act_qparam(node, 1.0f / 32768.0f, 0);
- }
- }
-
- void visit(luci::CircleStridedSlice *node)
- {
- auto input = loco::must_cast<luci::CircleNode *>(node->input());
- auto i_qparam = input->quantparam();
- assert(i_qparam);
- assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
- assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
- auto i_scale = i_qparam->scale[0];
- auto i_zp = i_qparam->zerop[0];
-
- set_act_qparam(node, i_scale, i_zp);
- }
-
- void visit(luci::CircleSplitOut *node)
- {
- auto split = loco::must_cast<luci::CircleSplit *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(split->input());
- auto i_qparam = input->quantparam();
- assert(i_qparam);
- assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
- assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
- auto i_scale = i_qparam->scale[0];
- auto i_zp = i_qparam->zerop[0];
-
- set_act_qparam(node, i_scale, i_zp);
- }
-
- void visit(luci::CircleSplitVOut *node)
- {
- auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
- auto i_qparam = input->quantparam();
- assert(i_qparam);
- assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
- assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
- auto i_scale = i_qparam->scale[0];
- auto i_zp = i_qparam->zerop[0];
-
- set_act_qparam(node, i_scale, i_zp);
- }
-
- void visit(luci::CircleUnpackOut *node)
- {
- auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(unpack->value());
- auto i_qparam = input->quantparam();
- assert(i_qparam);
- assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
- assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
- auto i_scale = i_qparam->scale[0];
- auto i_zp = i_qparam->zerop[0];
-
- set_act_qparam(node, i_scale, i_zp);
- }
-
- // TODO Move Softmax, Floor, Ceil from QuantizeActivation to here
-};
-
/**
- * @brief QuantizeActivation quantizes tensors for activations
- * @details Quantize using recorded min/max values
+ * Insert Quantize operator for mixed-precision quantization
+ * 1. Before input feature map (only for non-const)
+ * 2. After output feature map
+ *
+ * For example, if default_dtype = U8 and op_dtype = S16,
+ * 1. Quantize Op for U8->S16 is inserted before ifm
+ * 2. Quantize Op for S16->U8 is inserted after ofm
+ *
+ * Why not insert Quantize Op for const ifm?
+ * We quantize const tensor at once to preserve precision.
+ * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D,
+ * We directly quantize weights to 16 bits, not 8->16 bits.
*/
-struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
+struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void>
{
- QuantizeActivation(loco::DataType input, loco::DataType output)
- : input_type(input), output_type(output)
+ InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype)
+ : _default_dtype(default_dtype), _op_dtype(op_dtype)
{
+ assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS
}
- loco::DataType input_type;
- loco::DataType output_type;
+private:
+ loco::DataType _default_dtype;
+ loco::DataType _op_dtype;
- // Quantize input tensors of each node
- bool visit(luci::CircleNode *node)
+private:
+ luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(in);
+ if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
+ return nullptr;
+
+ auto input_quant = create_quantize_op(input, _op_dtype);
+ input_quant->input(input);
+ auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
+ luci::add_origin(input_quant, luci::get_origin(origin_node));
+ return input_quant;
+ }
+
+ void insert_out_quantize(loco::Node *node)
+ {
+ auto output = loco::must_cast<luci::CircleNode *>(node);
+ assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
+ auto output_quant = create_quantize_op(output, _default_dtype);
+
+ luci::add_origin(output_quant, luci::get_origin(output));
+ loco::replace(node).with(output_quant);
+ output_quant->input(node);
+ }
+
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME) \
+ void visit(NODE *node) \
+ { \
+ if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
+ node->INPUT_NAME(input_quant); \
+ \
+ insert_out_quantize(node); \
+ }
+
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \
+ void visit(NODE *node) \
+ { \
+ if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
+ node->INPUT_NAME(input_quant); \
+ \
+ auto out_nodes = loco::succs(node); \
+ for (auto out_node : out_nodes) \
+ { \
+ auto out_circle = loco::must_cast<OUT_NAME *>(out_node); \
+ insert_out_quantize(out_circle); \
+ } \
+ }
+
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2) \
+ void visit(NODE *node) \
+ { \
+ if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \
+ node->INPUT_NAME1(input1_quant); \
+ \
+ if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \
+ node->INPUT_NAME2(input2_quant); \
+ \
+ insert_out_quantize(node); \
+ }
+
+ // Default behavior (NYI)
+ void visit(luci::CircleNode *node)
+ {
+ throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " +
+ node->name());
+ }
+
+ // Skip output layer
+ void visit(luci::CircleOutput *) {}
+ void visit(luci::CircleSplitVOut *) {}
+ void visit(luci::CircleSplitOut *) {}
+ void visit(luci::CircleTopKV2Out *) {}
+ void visit(luci::CircleUniqueOut *) {}
+ void visit(luci::CircleUnpackOut *) {}
+
+ // Ops that receive a single activation as an input
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop)
+
+ // Ops that receive two activations as inputs
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y)
+ INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y)
+
+ // Multiple-output ops that receive one activation as inputs
+ INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut)
+ INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut)
+ INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out)
+ INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut)
+ INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut)
+
+ // AddN has arbitrary number of inputs
+ void visit(luci::CircleAddN *node)
{
- LOGGER(l);
- INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
auto arity = node->arity();
for (uint32_t i = 0; i < arity; i++)
{
- auto input_node = node->arg(i);
- auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
-
- // Check if this is already quantized
- if (is_quantized(circle_node))
- continue;
-
- // Check if this is bias (bias is quantized later)
- auto iwo = get_input_weight_output_of_bias(circle_node);
- if (iwo.size() > 0)
- continue;
-
- // Check if this is bool type (bool type is not quantized)
- if (circle_node->dtype() == loco::DataType::BOOL)
- continue;
-
- // Check if this is activation
- // We assume min/max are recorded only for activations
- if (has_min_max(circle_node) && !is_weights(circle_node))
- {
- // Quantize using recorded min/max
- auto quantparam = circle_node->quantparam();
- assert(quantparam);
- assert(quantparam->min.size() == 1); // only support layer-wise quant
- assert(quantparam->max.size() == 1); // only support layer-wise quant
- auto min = quantparam->min[0];
- auto max = quantparam->max[0];
-
- // Special values
- if (circle_node->opcode() == luci::CircleOpcode::SOFTMAX)
- {
- min = 0.0f;
- max = 1.0f;
- }
-
- float scaling_factor{0};
- int64_t zp{0};
- float nudged_min{0};
- float nudged_max{0};
-
- if (output_type == loco::DataType::U8)
- {
- compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
- circle_node->dtype(loco::DataType::U8);
- }
- else
- {
- compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
- circle_node->dtype(loco::DataType::S16);
- }
-
- // Nodes fused with activation functions which need special quantization
- auto fused_act_node =
- dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(circle_node);
- if (fused_act_node != nullptr &&
- fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
- {
- if (output_type == loco::DataType::U8)
- {
- scaling_factor = 2.0f / 256.0f;
- zp = 128;
- }
- else
- {
- assert(output_type == loco::DataType::S16);
- scaling_factor = 1.0f / 32768.0f;
- zp = 0;
- }
- }
-
- // The output of these Ops should be integer, so scale should be integer
- // TODO Handle cases where the integer scale needs to be propagated
- if (circle_node->opcode() == CircleOpcode::FLOOR ||
- circle_node->opcode() == CircleOpcode::FLOOR_DIV ||
- circle_node->opcode() == CircleOpcode::FLOOR_MOD ||
- circle_node->opcode() == CircleOpcode::CEIL)
- {
- assert(scaling_factor >= 0); // FIX_ME_UNLESS
- scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor);
- }
-
- circle_node->quantparam()->scale.push_back(scaling_factor);
- circle_node->quantparam()->zerop.push_back(zp);
- }
- // Fix special attributes
- if (circle_node->opcode() == luci::CircleOpcode::CAST)
- {
- auto *cast = loco::must_cast<luci::CircleCast *>(circle_node);
- auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x());
-
- // make sure that cast_input is already quantized
- assert(cast_input->dtype() != loco::DataType::FLOAT32);
- cast->in_data_type(cast_input->dtype());
- cast->out_data_type(cast->dtype());
- }
- }
- return false;
- }
-};
-
-struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
-{
- QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
- : input_type(input), output_type(output), granularity(gr)
- {
- }
-
- loco::DataType input_type;
- loco::DataType output_type;
- QuantizationGranularity granularity;
-
- // Quantize bias node
- bool visit(luci::CircleNode *node)
- {
- // Check if this is already quantized
- if (is_quantized(node))
- return false;
-
- auto iwo_list = get_input_weight_output_of_bias(node);
-
- for (auto iwo : iwo_list)
- {
- assert(iwo.size() == 3);
-
- auto input = loco::must_cast<luci::CircleNode *>(iwo[0]);
- auto weight = loco::must_cast<luci::CircleNode *>(iwo[1]);
- auto output = loco::must_cast<luci::CircleNode *>(iwo[2]);
-
- auto const_bias = loco::must_cast<luci::CircleConst *>(node);
- assert(const_bias->dtype() == loco::DataType::FLOAT32);
-
- // If input is const, it is quantized here, not in QuantizeActivation
- if (auto const_input = dynamic_cast<luci::CircleConst *>(input))
- {
- quant_const(const_input, output_type);
- }
-
- CircleConst *new_bias = nullptr;
-
- if (granularity == QuantizationGranularity::ChannelWise)
- {
- auto input_q = input->quantparam();
- assert(input_q);
- assert(input_q->scale.size() == 1); // input scale's layer-wise
- auto input_scale = input_q->scale[0];
-
- assert(weight->quantparam() != nullptr); // weight scale's channel-wise
- auto weight_scale = weight->quantparam()->scale;
-
- uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
- assert(size == weight_scale.size());
- std::vector<float> scaling_factor(size);
- std::vector<int64_t> zp(size);
-
- if (output_type == loco::DataType::U8)
- {
- new_bias =
- quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
- }
- else if (output_type == loco::DataType::S16)
- {
- new_bias =
- int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
- }
- else
- {
- throw std::runtime_error("Unsupported quantization type.");
- }
-
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->scale = scaling_factor;
- quantparam->zerop = zp;
- assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
- new_bias->quantparam(std::move(quantparam));
-
- set_bias(output, new_bias);
- }
- else
- {
- auto input_q = input->quantparam();
- assert(input_q);
- assert(input_q->scale.size() == 1); // Only support per-layer quant
- auto input_scale = input_q->scale[0];
-
- auto weight_q = weight->quantparam();
- assert(weight_q);
- assert(weight_q->scale.size() == 1); // Only support per-layer quant
- auto weight_scale = weight_q->scale[0];
-
- float scaling_factor{0};
- int64_t zp{0};
- new_bias =
- asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->scale.push_back(scaling_factor);
- quantparam->zerop.push_back(zp);
- assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
- new_bias->quantparam(std::move(quantparam));
-
- set_bias(output, new_bias);
- }
- }
- return false;
- }
-};
-
-/**
- * @brief QuantizeWeights quantizes tensors for weights
- * @details Find min/max values on the fly and then quantize
- */
-struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
-{
- QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
- : input_type(input), output_type(output), granularity(gr)
- {
- }
-
- loco::DataType input_type;
- loco::DataType output_type;
- QuantizationGranularity granularity;
-
-private:
- void quantize_weights(luci::CircleConst *weights)
- {
- // Find min/max per channel-wise
- if (granularity == QuantizationGranularity::ChannelWise)
- {
- auto quantparam = weights->quantparam();
- if (quantparam == nullptr)
- {
- assert(false && "quantparam is nullptr");
- return;
- }
-
- auto min = quantparam->min;
- auto scaling_factor = quantparam->scale;
- int32_t channel_dim_index = 0;
-
- if (output_type == loco::DataType::U8)
- {
- asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
- }
- else
- {
- sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
- }
- quantparam->min.clear();
- quantparam->max.clear();
- quantparam->quantized_dimension = channel_dim_index;
- }
- // Find min/max per layer-wise
- else
- {
- // Quantize using recorded quantparam
- auto quantparam = weights->quantparam();
- assert(quantparam != nullptr);
- assert(quantparam->min.size() == 1); // only support layer-wise quant
- assert(quantparam->scale.size() == 1); // only support layer-wise quant
- auto min = quantparam->min[0];
- auto scaling_factor = quantparam->scale[0];
- asym_wquant_per_layer(weights, min, scaling_factor);
- quantparam->min.clear();
- quantparam->max.clear();
- }
- }
-
- bool visit(luci::CircleConv2D *node)
- {
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
- auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
- if (!is_quantized(weights))
- {
- auto new_weights = luci::clone(weights);
- node->filter(new_weights);
- quantize_weights(new_weights);
- return true;
+ if (auto input_quant = create_in_quantize(node->inputs(i), node))
+ node->inputs(i, input_quant);
}
- return false;
- }
-
- bool visit(luci::CircleDepthwiseConv2D *node)
- {
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
- auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
- if (!is_quantized(weights))
- {
- auto new_weights = luci::clone(weights);
- node->filter(new_weights);
- quantize_weights(new_weights);
- return true;
- }
- return false;
+ insert_out_quantize(node);
}
- bool visit(luci::CircleInstanceNorm *node)
+ // Concat has arbitrary number of inputs
+ void visit(luci::CircleConcatenation *node)
{
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
- auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
- auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
-
- bool changed = false;
- if (!is_quantized(gamma))
- {
- assert(gamma->dtype() == loco::DataType::FLOAT32);
- auto new_gamma = luci::clone(gamma);
- if (granularity == QuantizationGranularity::LayerWise)
- quant_const(new_gamma, output_type);
- else if (granularity == QuantizationGranularity::ChannelWise)
- quant_const_per_channel(new_gamma, output_type);
- node->gamma(new_gamma);
- changed = true;
- }
- if (!is_quantized(beta))
- {
- assert(beta->dtype() == loco::DataType::FLOAT32);
- auto new_beta = luci::clone(beta);
- if (granularity == QuantizationGranularity::LayerWise)
- quant_const(new_beta, output_type);
- else if (granularity == QuantizationGranularity::ChannelWise)
- quant_const_per_channel(new_beta, output_type);
- node->beta(new_beta);
- changed = true;
- }
-
- return changed;
- }
-
- bool visit(luci::CirclePRelu *node)
- {
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
- auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
-
- if (!is_quantized(alpha))
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
{
- assert(alpha->dtype() == loco::DataType::FLOAT32);
- auto new_alpha = luci::clone(alpha);
- if (granularity == QuantizationGranularity::LayerWise)
- quant_const(new_alpha, output_type);
- else if (granularity == QuantizationGranularity::ChannelWise)
- quant_const_per_channel(new_alpha, output_type);
- node->alpha(new_alpha);
- return true;
+ if (auto input_quant = create_in_quantize(node->values(i), node))
+ node->values(i, input_quant);
}
- return false;
+ insert_out_quantize(node);
}
- bool visit(luci::CircleTransposeConv *node)
+ // Pack has arbitrary number of inputs
+ void visit(luci::CirclePack *node)
{
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
- auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
- if (!is_quantized(weights))
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
{
- auto new_weights = luci::clone(weights);
- node->filter(new_weights);
- quantize_weights(new_weights);
- return true;
+ if (auto input_quant = create_in_quantize(node->values(i), node))
+ node->values(i, input_quant);
}
- return false;
- }
-
- bool visit(luci::CircleFullyConnected *node)
- {
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
- auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
- if (!is_quantized(weights))
- {
- auto new_weights = luci::clone(weights);
- node->weights(new_weights);
- quantize_weights(new_weights);
- return true;
- }
- return false;
+ insert_out_quantize(node);
}
- bool visit(luci::CircleNode *) { return false; }
+#undef INSERT_QUANTIZE_TO_UNARY_OP
+#undef INSERT_QUANTIZE_TO_BINARY_OP
+#undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP
};
-/** EXAMPLE
- *
- * BEFORE
- *
- * [CircleNode] [CircleConst]
- * (qparam1) (FP32)
- * \ /
- * \ /
- * [CirclePack]
- * (qparam2)
- *
- * AFTER
- *
- * [CircleNode] [CircleConst] [CircleConst] <- Dead node
- * (qparam2) (qparam2) (FP32)
- * \ /
- * \ /
- * [CirclePack]
- * (qparam2)
- *
- * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs.
- */
-void propagate_pack_quantparam(luci::CirclePack *pack, loco::DataType quant_type)
-{
- assert(pack->quantparam() != nullptr);
-
- const auto num_inputs = pack->values_count();
-
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i));
-
- // Skip if this input is PACK Op
- if (node->opcode() == luci::CircleOpcode::PACK)
- continue;
-
- // Quantize constant values
- if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
- if (const_node->dtype() != loco::DataType::FLOAT32)
- throw std::runtime_error("Unsupported data type for constant input of pack Op");
-
- const auto pack_qparam = pack->quantparam();
- if (pack_qparam == nullptr)
- throw std::runtime_error("quantparam of pack is not found during propagation");
-
- assert(pack_qparam->scale.size() == 1);
- assert(pack_qparam->zerop.size() == 1);
- const auto scaling_factor = pack_qparam->scale[0];
- const auto zerop = pack_qparam->zerop[0];
-
- auto new_const = luci::clone(const_node);
- quant_const_values(new_const, scaling_factor, zerop, quant_type);
- pack->values(i, new_const);
- overwrite_quantparam(pack, new_const);
- }
- else
- {
- const auto succs = loco::succs(node);
- if (succs.size() > 1)
- continue;
-
- // Non-const input must have been quantized
- assert(node->quantparam() != nullptr);
- overwrite_quantparam(pack, node);
- }
- }
-}
-
-/**
- * @brief Quantize const input tensors using min/max of const values
- */
-void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
-{
- auto opcode = node->opcode();
- auto arity = node->arity();
-
- loco::Node *input_node{nullptr};
- luci::CircleConst *const_node{nullptr};
-
- switch (opcode)
- {
- case luci::CircleOpcode::CONV_2D:
- case luci::CircleOpcode::DEPTHWISE_CONV_2D:
- case luci::CircleOpcode::FULLY_CONNECTED:
- case luci::CircleOpcode::INSTANCE_NORM:
- case luci::CircleOpcode::PRELU:
- case luci::CircleOpcode::TRANSPOSE_CONV:
- // Handled in QuantizeWeights and QuantizeBias
- break;
-
- case luci::CircleOpcode::CONCATENATION:
- // Handled in propagate_concat_quantparam
- break;
-
- case luci::CircleOpcode::LOGICAL_OR:
- // Inputs of logical Ops are bool, thus not quantized
- break;
-
- case luci::CircleOpcode::ARG_MAX:
- case luci::CircleOpcode::ARG_MIN:
- case luci::CircleOpcode::BATCH_TO_SPACE_ND:
- case luci::CircleOpcode::LOCAL_RESPONSE_NORMALIZATION:
- case luci::CircleOpcode::MEAN:
- case luci::CircleOpcode::MIRROR_PAD:
- case luci::CircleOpcode::PAD:
- case luci::CircleOpcode::REDUCE_ANY:
- case luci::CircleOpcode::REDUCE_PROD:
- case luci::CircleOpcode::REDUCE_MAX:
- case luci::CircleOpcode::REDUCE_MIN:
- case luci::CircleOpcode::RESHAPE:
- case luci::CircleOpcode::RESIZE_BILINEAR:
- case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
- case luci::CircleOpcode::REVERSE_SEQUENCE:
- case luci::CircleOpcode::SLICE:
- case luci::CircleOpcode::SPACE_TO_BATCH_ND:
- case luci::CircleOpcode::SPLIT_V:
- case luci::CircleOpcode::STRIDED_SLICE:
- case luci::CircleOpcode::SUM:
- case luci::CircleOpcode::TILE:
- case luci::CircleOpcode::TOPK_V2:
- case luci::CircleOpcode::TRANSPOSE:
- // The second input of these Ops should not be quantized
- // Ex: axis, paddings
- input_node = node->arg(0);
- const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr && !is_quantized(const_node))
- quant_const(const_node, output_type);
- break;
-
- case luci::CircleOpcode::ADD:
- case luci::CircleOpcode::ADD_N:
- case luci::CircleOpcode::DEPTH_TO_SPACE:
- case luci::CircleOpcode::DIV:
- case luci::CircleOpcode::ELU:
- case luci::CircleOpcode::EQUAL:
- case luci::CircleOpcode::EXP:
- case luci::CircleOpcode::FLOOR:
- case luci::CircleOpcode::FLOOR_DIV:
- case luci::CircleOpcode::GREATER:
- case luci::CircleOpcode::GREATER_EQUAL:
- case luci::CircleOpcode::LESS:
- case luci::CircleOpcode::LESS_EQUAL:
- case luci::CircleOpcode::LOGISTIC:
- case luci::CircleOpcode::MAXIMUM:
- case luci::CircleOpcode::MINIMUM:
- case luci::CircleOpcode::MUL:
- case luci::CircleOpcode::NOT_EQUAL:
- case luci::CircleOpcode::POW:
- case luci::CircleOpcode::RSQRT:
- case luci::CircleOpcode::SOFTMAX:
- case luci::CircleOpcode::SPACE_TO_DEPTH:
- case luci::CircleOpcode::SQRT:
- case luci::CircleOpcode::SUB:
- case luci::CircleOpcode::TANH:
- case luci::CircleOpcode::UNPACK:
- // Quantize all const inputs using their values
- for (uint32_t i = 0; i < arity; i++)
- {
- input_node = node->arg(i);
- const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr && !is_quantized(const_node))
- quant_const(const_node, output_type);
- }
- break;
-
- case luci::CircleOpcode::SPLIT:
- // Only the second input is quantized
- // First input should not be quantized (e.g., split_dim)
- input_node = node->arg(1);
- const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr && !is_quantized(const_node))
- quant_const(const_node, output_type);
- break;
-
- case luci::CircleOpcode::PADV2:
- // First and third constant inputs are quantized
- // Second input should not be quantized (e.g., paddings)
- // Quant params are propagated either from output range to the non-constant input
- // or from input to output and constant values
- propagate_pad_v2_quantparam(loco::must_cast<CirclePadV2 *>(node), output_type);
- break;
-
- case luci::CircleOpcode::PACK:
- // Quant param is propagated from output to inputs
- propagate_pack_quantparam(loco::must_cast<CirclePack *>(node), output_type);
- break;
-
- default:
- for (uint32_t i = 0; i < arity; i++)
- {
- input_node = node->arg(i);
- const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr)
- throw std::runtime_error("Unsupported Op for const inputs");
- }
- break;
- }
-}
-
} // namespace
-/** BEFORE
- *
- * [CircleNode] [CircleConst]
- * (U8 qparam1) (FP32)
- * \ /
- * \ /
- * [CircleConcatenation]
- * (U8 qparam2)
- *
- * AFTER
- * [CircleNode] [CircleConst] [CircleConst] <- Dead node
- * (U8 qparam2) (U8 qparam2) (FP32)
- * \ /
- * \ /
- * [CircleConcatenation]
- * (U8 qparam2)
- */
-void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type)
-{
- assert(concat->quantparam() != nullptr);
-
- const auto num_inputs = concat->numValues();
-
- // Quantize const inputs using their values if concat has fused act function
- if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
- {
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- auto node = concat->arg(i);
- auto const_node = dynamic_cast<luci::CircleConst *>(node);
- if (const_node != nullptr)
- {
- auto new_const = luci::clone(const_node);
- quant_const(new_const, quant_type);
- concat->values(i, new_const);
- }
- }
- return;
- }
-
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
-
- // Skip if this input is CONCAT Op
- if (node->opcode() == luci::CircleOpcode::CONCATENATION)
- continue;
-
- // Quantize constant values
- if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
- if (const_node->dtype() != loco::DataType::FLOAT32)
- throw std::runtime_error("Unsupported data type for constant input of concatenation Op");
-
- const auto concat_qparam = concat->quantparam();
- if (concat_qparam == nullptr)
- throw std::runtime_error("quantparam of concat is not found during propagation");
-
- assert(concat_qparam->scale.size() == 1);
- const auto scaling_factor = concat_qparam->scale[0];
- const auto zerop = concat_qparam->zerop[0];
-
- auto new_const = luci::clone(const_node);
- quant_const_values(new_const, scaling_factor, zerop, quant_type);
- concat->values(i, new_const);
- overwrite_quantparam(concat, new_const);
- }
- else
- {
- const auto succs = loco::succs(node);
- if (succs.size() > 1)
- continue;
-
- // Non-const input must have been quantized
- assert(node->quantparam() != nullptr);
- overwrite_quantparam(concat, node);
- }
- }
-}
-
-/**
- * tells if pad_v2 quantization should ignore padding value
- * In that case padding const will be quantized with input parameters, and probably clipped
- */
-bool ignore_pad_v2_const_quantization(luci::CirclePadV2 *pad)
-{
- // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly
- // TODO use metadata hints to detect this case
- auto const_value_node = dynamic_cast<luci::CircleConst *>(pad->arg(2));
- if (!const_value_node)
- return false;
- if (const_value_node->dtype() == loco::DataType::FLOAT32)
- {
- float const_value = const_value_node->at<loco::DataType::FLOAT32>(0);
- if (const_value == std::numeric_limits<float>::lowest())
- return true;
- }
- return false;
-}
-
-/** BEFORE
- *
- * [CircleNode] [CircleConst] [CircleConst]
- * (U8 qparam1) (S32) (FP32)
- * \ | /
- * \ | /
- * [CirclePadV2]
- * (U8 qparam2)
- *
- * AFTER (case 1)
- *
- * By default qparam is propagated from output to inputs to meet backend requirements.
- *
- * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node
- * (U8 qparam2) (S32) (U8 qparam2) (FP32)
- * \ | /
- * \ | /
- * [CirclePadV2]
- * (U8 qparam2)
- *
- * AFTER (case 2)
- *
- * In case padded value is the lowest float value
- * Qparam is propagated from input to output and constant.
- *
- * This is a special case for optimization constructed pad, needed to guarantee that
- * extremely large negative constant do not stretch output quantization range.
- *
- * [CircleNode] [CircleConst] [CircleConst] [CircleConst] <- Dead node
- * (U8 qparam1) (S32) (U8 qparam1) (FP32)
- * \ | /
- * \ | /
- * [CirclePadV2]
- * (U8 qparam1)
- */
-void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type)
-{
- if (ignore_pad_v2_const_quantization(pad_v2))
- {
- // propagate input quantization paramters from input to output and padding const value
- auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
- overwrite_quantparam(pad_v2_input, pad_v2);
-
- auto const_value_node = loco::must_cast<luci::CircleConst *>(
- pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
- auto new_const = luci::clone(const_value_node);
-
- const auto pad_v2_input_qparam = pad_v2_input->quantparam();
- assert(pad_v2_input_qparam != nullptr);
- assert(pad_v2_input_qparam->scale.size() == 1);
- const auto scaling_factor = pad_v2_input_qparam->scale.at(0);
- const auto zerop = pad_v2_input_qparam->zerop.at(0);
-
- quant_const_values(new_const, scaling_factor, zerop, quant_type);
- overwrite_quantparam(pad_v2_input, new_const);
- pad_v2->constant_values(new_const);
- return;
- }
-
- // Propagate quantization paramters from output to inputs,
- // to fit both input and counstant_value in one quant range.
- auto quant_input = [pad_v2, quant_type](void (CirclePadV2::*arg_setter)(loco::Node *),
- uint32_t arg) {
- auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg));
-
- // Quantize constant values
- if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
- if (is_quantized(const_node))
- return;
-
- if (const_node->dtype() != loco::DataType::FLOAT32)
- throw std::runtime_error("Unsupported data type for constant input of PadV2 Op");
-
- const auto pad_v2_qparam = pad_v2->quantparam();
- if (pad_v2_qparam == nullptr)
- throw std::runtime_error("quantparam of PadV2 is not found during propagation");
-
- assert(pad_v2_qparam->scale.size() == 1);
- const auto scaling_factor = pad_v2_qparam->scale.at(0);
- const auto zerop = pad_v2_qparam->zerop.at(0);
-
- auto new_const = luci::clone(const_node);
- quant_const_values(new_const, scaling_factor, zerop, quant_type);
- overwrite_quantparam(pad_v2, new_const);
- (pad_v2->*arg_setter)(new_const);
- }
- // Subsequent PadV2 Ops quant params are not propagated
- else if (node->opcode() == luci::CircleOpcode::PADV2)
- {
- return;
- }
- else
- {
- const auto succs = loco::succs(node);
- if (succs.size() > 1)
- return;
-
- // Non-const input must have been quantized
- assert(node->quantparam() != nullptr);
- overwrite_quantparam(pad_v2, node);
- }
- };
-
- quant_input(&CirclePadV2::input, 0);
- quant_input(&CirclePadV2::constant_values, 2);
-}
-
void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
{
auto inputs = g->inputs();
for (auto node : loco::input_nodes(g))
{
auto input = loco::must_cast<luci::CircleInput *>(node);
- if (input->dtype() == _input_type)
+ if (input->dtype() == _ctx->input_type)
continue;
// Bool type is not quantizable
if (input->dtype() == loco::DataType::BOOL)
continue;
+ if (input->dtype() == loco::DataType::S32)
+ continue;
+ if (input->dtype() == loco::DataType::S64)
+ continue;
// Insert Quantize Op
auto quant_op = create_quantize_op(input, input->dtype());
@@ -1552,22 +367,22 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
float nudged_min{0};
float nudged_max{0};
- if (_input_type == loco::DataType::U8)
+ if (_ctx->input_type == loco::DataType::U8)
{
compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
}
else
{
- assert(_input_type == loco::DataType::S16);
+ assert(_ctx->input_type == loco::DataType::S16);
compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
}
- input->dtype(_input_type);
+ input->dtype(_ctx->input_type);
input->quantparam()->scale[0] = scaling_factor;
input->quantparam()->zerop[0] = zp;
}
auto graph_input = inputs->at(input->index());
- graph_input->dtype(_input_type);
+ graph_input->dtype(_ctx->input_type);
}
}
@@ -1577,7 +392,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
for (auto node : loco::output_nodes(g))
{
auto output = loco::must_cast<luci::CircleOutput *>(node);
- if (output->dtype() == _output_type)
+ if (output->dtype() == _ctx->output_type)
continue;
// Bool type is not quantizable
@@ -1591,7 +406,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
continue;
// Insert Quantize Op
- auto quant_op = create_quantize_op(from, _output_type);
+ auto quant_op = create_quantize_op(from, _ctx->output_type);
loco::replace(from).with(quant_op);
quant_op->input(from);
@@ -1599,67 +414,165 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
luci::add_origin(quant_op, luci::get_origin(from));
auto graph_output = outputs->at(output->index());
- graph_output->dtype(_output_type);
+ graph_output->dtype(_ctx->output_type);
}
}
+/**
+ * How QuantizeWithMinMax works?
+ *
+ * We categorized tensors into four groups
+ * - Activation: Feature maps (both Const/Non-const)
+ * - Weights: Const tensors of specific Ops (Conv, FC, ...)
+ * - Bias: Const tensors of specific Ops (Conv, FC, ...)
+ * - Others: padding value, one_hot value, axis, ..
+ *
+ * Activation is quantized in different ways
+ * 1. For non-constant activation, quantize using recorded min/max
+ * 2. For constant activation, quantize using min/max of its value
+ * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation)
+ * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation)
+ * 5. For some Ops (ex: tanh), output qparam has pre-defined values
+ *
+ * Weights is quantized using min/max of its value
+ *
+ * Bias is quantized using input scale (s_i) and weights scale (s_w)
+ * - Activation and weights should be quantized earlier than bias
+ *
+ * Quantization Steps
+ * 1. Quantize Activation
+ * - Quantize using recorded min/max (QuantizeActivation)
+ * - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
+ * - Remove redundant Quantize Ops (RemoveRedundantQuantizePass)
+ * - Propagate qparam backward (PropagateQParamBackwardPass)
+ * - Quantize const inputs (QuantizeConstInputActivation)
+ * - Quantize using pre-defined values (QuantizeSpecialActivation)
+ * - Propagate qparam forward (PropagateQParamForwardPass)
+ * 2. Quantize Weights
+ * 3. Quantize Bias
+ * 4. Set input dtype
+ * 5. Set output dtype
+ *
+ * Why quantization sequence was determined as above?
+ * - Activation and weights should be quantized before bias (1->2->3). Input/Output
+ * dtype can be updated at the end (4->5).
+ * - During activation quantization,
+ * - Backward propagation is performed earlier than forward propagation. This allows
+ * backward-propagated qpram to be overwritten during forward propagation.
+ * We made this decision as Ops for forward propagation (reshape, transpose, ..)
+ * are more common than backward propagation. TODO Check this decision is safe.
+ * - QuantizeSpecialActivation is called before forward propagation to make sure that
+ * the pre-defined qparam values are propagated.
+ */
bool QuantizeWithMinMaxPass::run(loco::Graph *g)
{
LOGGER(l);
INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
+ auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+ auto quantize_dtype = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization dtype
+ if (iter != info_by_name.end())
+ return iter->second.dtype;
+
+ // Return default quantization dtype
+ return _ctx->output_model_dtype;
+ };
+
+ auto quantize_granularity = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization granularity
+ if (iter != info_by_name.end())
+ return iter->second.granularity;
+
+ // Return default quantization granularity
+ return _ctx->granularity;
+ };
+
// Quantize activation
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeActivation qa(_input_model_dtype, _output_model_dtype);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
circle_node->accept(&qa);
}
- // Quantize weights
+ // Insert Quantize Op
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- circle_node->accept(&qw);
+ auto op_dtype = quantize_dtype(circle_node);
+ if (op_dtype != _ctx->output_model_dtype)
+ {
+ InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
+ circle_node->accept(&iqo);
+ }
}
- // Quantize bias
+ // Remove redundant Quantize Op
+ {
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+ }
+
+ // Backward propagation of activation qparam
+ {
+ PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype);
+ pqbp.run(g);
+ }
+
+ // Quantize const input activation
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- circle_node->accept(&qb);
+ QuantizeConstInputActivation qcia(quantize_dtype(circle_node));
+ circle_node->accept(&qcia);
}
- // Propagate quantization parameters of concat Op
+ // Update qparam of output of special Ops
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- auto concat = dynamic_cast<luci::CircleConcatenation *>(node);
- if (not concat)
- continue;
-
- // Propagate qparam of concat to its inputs if
- // (1) concat is uint8-quantized
- // (2) concat has no fused activation function
- // (3) the input is not concatenation Op
- // (4) the input is not produced to Ops other than concat
- propagate_concat_quantparam(concat, _output_model_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node));
+ circle_node->accept(&qsa);
}
- // Quantize const inputs other than weights and bias
+ // Forward propagation of activation qparam
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+
+ // Quantize weights
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- quantize_const_inputs(circle_node, _output_model_dtype);
+ QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+ quantize_granularity(circle_node));
+ circle_node->accept(&qw);
}
- // Update qparam of output of special Ops
+ // Quantize bias
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- circle_node->accept(&qsa);
+ QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
+ quantize_granularity(circle_node));
+ circle_node->accept(&qb);
}
// Update output dtype
@@ -1667,11 +580,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
for (auto node : loco::output_nodes(g))
{
auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
- if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype)
+ if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype)
{
- circle_node->dtype(_output_model_dtype);
+ circle_node->dtype(_ctx->output_model_dtype);
auto graph_output = graph_outputs->at(circle_node->index());
- graph_output->dtype(_output_model_dtype);
+ graph_output->dtype(_ctx->output_model_dtype);
}
}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
index 75ec0cfd8..d5fa21ffd 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
@@ -16,8 +16,41 @@
#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include <luci/IR/CircleNodes.h>
+
#include <gtest/gtest.h>
+class SimpleConcatGraph
+{
+public:
+ SimpleConcatGraph(loco::DataType quant_type)
+ {
+ concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+ input_1 = g.nodes()->create<luci::CircleConst>();
+ input_2 = g.nodes()->create<luci::CircleConst>();
+
+ concat_node->dtype(quant_type);
+ concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+ input_1->dtype(quant_type);
+ input_2->dtype(quant_type);
+
+ concat_node->values(0, input_1);
+ concat_node->values(1, input_2);
+ }
+
+ ~SimpleConcatGraph()
+ {
+ concat_node->values(0, nullptr);
+ concat_node->values(1, nullptr);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleConcatenation *concat_node = nullptr;
+ luci::CircleConst *input_1 = nullptr;
+ luci::CircleConst *input_2 = nullptr;
+};
+
TEST(QuantizeWithMinMaxPassTest, name)
{
luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
@@ -25,3 +58,19 @@ TEST(QuantizeWithMinMaxPassTest, name)
auto const name = pass.name();
ASSERT_NE(nullptr, name);
}
+
+// Test concat of integer tensors
+// Integer tensors are not quantized
+TEST(QuantizeWithMinMaxPassTest, int_concat)
+{
+ SimpleConcatGraph g(loco::DataType::S32);
+
+ luci::QuantizeWithMinMaxPass qwmm(loco::DataType::FLOAT32, loco::DataType::U8,
+ luci::QuantizationGranularity::LayerWise);
+
+ qwmm.run(&g.g);
+
+ EXPECT_EQ(nullptr, g.concat_node->quantparam());
+ EXPECT_EQ(nullptr, g.input_1->quantparam());
+ EXPECT_EQ(nullptr, g.input_2->quantparam());
+}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
index f02301ed1..684d5d48a 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
@@ -15,10 +15,10 @@
#include "QuantizedModelVerifier.h"
-#include "VerifyQuantizedNodeLayerWiseGranularity.h"
-#include "VerifyQuantizedNodeChannelWiseGranularity.h"
-#include "VerifyQuantizedNodeU8Type.h"
-#include "VerifyQuantizedNodeS16Type.h"
+#include "VerifyQuantizedNodeGranularity.h"
+#include "VerifyQuantizedNodeType.h"
+#include "VerifyQuantizedBiasScale.h"
+#include "helpers/LayerInfoMap.h"
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
@@ -28,12 +28,33 @@ namespace luci
void QuantizedModelVerifier::verify(loco::Graph *g)
{
- if (_quantized_dtype != Type::U8 && _quantized_dtype != Type::S16)
- throw std::runtime_error("Unsupported quantized dtype");
-
- if (_granularity != Granularity::ChannelWise && _granularity != Granularity::LayerWise)
+ if (_ctx->granularity != Granularity::ChannelWise && _ctx->granularity != Granularity::LayerWise)
throw std::runtime_error("Unsupported granularity");
+ auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+ auto quantize_dtype = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization dtype
+ if (iter != info_by_name.end())
+ return iter->second.dtype;
+
+ // Return default quantization dtype
+ return _ctx->output_model_dtype;
+ };
+
+ auto quantize_granularity = [&](const luci::CircleNode *node) {
+ auto iter = info_by_name.find(node->name());
+
+ // Return designated quantization granularity
+ if (iter != info_by_name.end())
+ return iter->second.granularity;
+
+ // Return default quantization granularity
+ return _ctx->granularity;
+ };
+
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
@@ -46,32 +67,17 @@ void QuantizedModelVerifier::verify(loco::Graph *g)
};
// Verify Type
- if (_quantized_dtype == Type::U8)
- {
- VerifyQuantizedNodeU8Type vt;
- if (!circle_node->accept(&vt))
- throw std::runtime_error("Wrong data type detected in " + node_name());
- }
- else if (_quantized_dtype == Type::S16)
- {
- VerifyQuantizedNodeS16Type vt;
- if (!circle_node->accept(&vt))
- throw std::runtime_error("Wrong data type detected in " + node_name());
- }
+ if (!VerifyQuantizedNodeType::create(quantize_dtype(circle_node))->verify(circle_node))
+ throw std::runtime_error("Wrong data type detected in " + node_name());
// Verify Granularity
- if (_granularity == Granularity::LayerWise)
- {
- VerifyQuantizedNodeLayerWiseGranularity vg;
- if (!circle_node->accept(&vg))
- throw std::runtime_error("Wrong granularity detected in " + node_name());
- }
- else if (_granularity == Granularity::ChannelWise)
- {
- VerifyQuantizedNodeChannelWiseGranularity vg;
- if (!circle_node->accept(&vg))
- throw std::runtime_error("Wrong granularity detected in " + node_name());
- }
+ if (!circle_node->accept(
+ VerifyQuantizedNodeGranularity::create(quantize_granularity(circle_node)).get()))
+ throw std::runtime_error("Wrong granularity detected in " + node_name());
+
+ // Verify Bias scale
+ if (!VerifyQuantizedBiasScale::create()->verify(circle_node))
+ throw std::runtime_error("Wrong bias scale detected in " + node_name());
}
}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.h b/compiler/luci/pass/src/QuantizedModelVerifier.h
index d5fbb8e74..7409a51d7 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.h
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.h
@@ -21,6 +21,8 @@
#include <loco.h>
+#include <memory>
+
namespace luci
{
@@ -31,18 +33,40 @@ namespace luci
*/
struct QuantizedModelVerifier
{
+public:
+ struct Context
+ {
+ loco::DataType output_model_dtype = loco::DataType::Unknown;
+ QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+ loco::DataType input_type = loco::DataType::Unknown;
+ loco::DataType output_type = loco::DataType::Unknown;
+ bool TF_style_maxpool = false;
+ std::vector<LayerInfo> layers_info;
+ };
public:
QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
- : _quantized_dtype(quantized_dtype), _granularity(granularity)
{
+ _ctx = std::make_unique<Context>();
+ {
+ _ctx->output_model_dtype = quantized_dtype;
+ _ctx->granularity = granularity;
+ _ctx->input_type = quantized_dtype;
+ _ctx->output_type = quantized_dtype;
+ _ctx->TF_style_maxpool = false;
+ }
+ }
+
+public:
+ QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+ {
+ // DO NOTHING
}
void verify(loco::Graph *g);
private:
- loco::DataType _quantized_dtype;
- QuantizationGranularity _granularity;
+ std::unique_ptr<Context> _ctx;
};
} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
index 3a6d86c33..cebafd32b 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -17,6 +17,7 @@
#include "QuantizedModelVerifier.h"
#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizationParameters.h"
#include <luci/test/TestIOGraph.h>
@@ -112,57 +113,77 @@ void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granu
verifier.verify(g);
}
-// Helper function to reduce duplicate test codes
-// Assumption: g->output()->from() is the target node
-void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
- Granularity granularity, Type wrong_dtype)
+void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
+ Granularity granularity)
{
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
- pass.run(g->g());
-
- auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
- node->dtype(wrong_dtype);
+ // A layer named "test" has dtype different from quantized_dtype
+ luci::LayerInfo info;
+ {
+ info.name = "test";
+ // dtype is different from quantized_dtype
+ info.dtype = quantized_dtype == Type::U8 ? Type::S16 : Type::U8;
+ info.granularity = Granularity::ChannelWise;
+ }
- luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
- verifier.verify(g->g());
-}
+ // Do quantization
+ {
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = Type::FLOAT32;
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ ctx->input_type = quantized_dtype;
+ ctx->output_type = quantized_dtype;
+ ctx->TF_style_maxpool = false;
+ ctx->layers_info.push_back(info);
+ }
-void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
- Granularity granularity, Type wrong_dtype,
- luci::CircleNode *target)
-{
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
- pass.run(g->g());
+ luci::QuantizeWithMinMaxPass pass(std::move(ctx));
+ pass.run(g);
+ }
- target->dtype(wrong_dtype);
+ // Do verification
+ {
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+ {
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ ctx->input_type = quantized_dtype;
+ ctx->output_type = quantized_dtype;
+ ctx->TF_style_maxpool = false;
+ ctx->layers_info.push_back(info);
+ }
- luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
- verifier.verify(g->g());
+ luci::QuantizedModelVerifier verifier(std::move(ctx));
+ verifier.verify(g);
+ }
}
// Helper function to reduce duplicate test codes
// Assumption: g->output()->from() is the target node
-void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
- Granularity granularity)
+void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
+ Granularity granularity, Type wrong_dtype)
{
luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
pass.run(g->g());
auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
- insert_scale_zp(node, 1.0, 1);
+ node->dtype(wrong_dtype);
luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
verifier.verify(g->g());
}
// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
- Granularity granularity, luci::CircleNode *target)
+ Granularity granularity)
{
luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
pass.run(g->g());
- insert_scale_zp(target, 1.0, 1);
+ auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+ insert_scale_zp(node, 1.0, 1);
luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
verifier.verify(g->g());
@@ -230,6 +251,8 @@ public:
_instnorm->input(input());
_instnorm->gamma(_gamma);
_instnorm->beta(_beta);
+ _instnorm->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _instnorm->name("test");
}
output()->from(_instnorm);
@@ -256,6 +279,7 @@ public:
_logistic = g()->nodes()->create<luci::CircleLogistic>();
{
_logistic->x(input());
+ _logistic->name("test");
}
output()->from(_logistic);
@@ -275,6 +299,7 @@ public:
_lrn = g()->nodes()->create<luci::CircleLocalResponseNormalization>();
{
_lrn->input(input());
+ _lrn->name("test");
}
output()->from(_lrn);
@@ -295,6 +320,7 @@ public:
{
_softmax->logits(input());
_softmax->beta(0.1);
+ _softmax->name("test");
}
output()->from(_softmax);
@@ -324,6 +350,7 @@ public:
_stob->input(input());
_stob->block_shape(_block_shape);
_stob->paddings(_paddings);
+ _stob->name("test");
}
output()->from(_stob);
@@ -346,6 +373,7 @@ public:
{
_stod->input(input());
_stod->block_size(2);
+ _stod->name("test");
}
output()->from(_stod);
@@ -375,6 +403,7 @@ public:
_slice->input(input());
_slice->begin(_begin);
_slice->size(_size);
+ _slice->name("test");
}
output()->from(_slice);
@@ -472,6 +501,7 @@ public:
_slice->begin(_begin);
_slice->end(_end);
_slice->strides(_strides);
+ _slice->name("test");
}
output()->from(_slice);
@@ -499,6 +529,7 @@ public:
{
_reshape->tensor(input());
_reshape->shape(_shape);
+ _reshape->name("test");
}
output()->from(_reshape);
@@ -519,6 +550,7 @@ public:
_tanh = g()->nodes()->create<luci::CircleTanh>();
{
_tanh->x(input());
+ _tanh->name("test");
}
output()->from(_tanh);
@@ -538,6 +570,7 @@ public:
_floor = g()->nodes()->create<luci::CircleFloor>();
{
_floor->x(input());
+ _floor->name("test");
}
output()->from(_floor);
@@ -601,6 +634,7 @@ public:
_btos->input(input());
_btos->block_shape(_block_shape);
_btos->crops(_crops);
+ _btos->name("test");
}
output()->from(_btos);
@@ -623,6 +657,7 @@ public:
{
_dtos->input(input());
_dtos->block_size(2);
+ _dtos->name("test");
}
output()->from(_dtos);
@@ -645,6 +680,7 @@ public:
_pack->values(0, input());
_pack->values(1, _param);
_pack->axis(0);
+ _pack->name("test");
}
output()->from(_pack);
@@ -680,6 +716,7 @@ public:
{
_pad->input(input());
_pad->paddings(_paddings);
+ _pad->name("test");
}
output()->from(_pad);
@@ -707,6 +744,7 @@ public:
_pad->input(input());
_pad->paddings(_paddings);
_pad->constant_values(_constant_values);
+ _pad->name("test");
}
output()->from(_pad);
@@ -735,6 +773,7 @@ public:
_mirror_pad->input(input());
_mirror_pad->paddings(_paddings);
_mirror_pad->mode(luci::MirrorPadMode::REFLECT);
+ _mirror_pad->name("test");
}
output()->from(_mirror_pad);
@@ -761,6 +800,7 @@ public:
{
_transpose->a(input());
_transpose->perm(_perm);
+ _transpose->name("test");
}
output()->from(_transpose);
@@ -784,6 +824,8 @@ public:
_concat->values(0, input());
_concat->values(1, _param);
_concat->axis(0);
+ _concat->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _concat->name("test");
}
output()->from(_concat);
@@ -795,6 +837,54 @@ private:
luci::CircleConst *_param = nullptr;
};
+template <Type indexT> class OneHotTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32, 10});
+ {
+ // input dtype is float by default, but OneHot's input should have indexType (s32/s64)
+ input()->dtype(indexT);
+ }
+
+ _depth = g()->nodes()->template create<luci::CircleConst>();
+ {
+ _depth->dtype(loco::DataType::S32);
+ }
+
+ _on_value = g()->nodes()->template create<luci::CircleConst>();
+ {
+ _on_value->dtype(loco::DataType::FLOAT32);
+ }
+
+ _off_value = g()->nodes()->template create<luci::CircleConst>();
+ {
+ _off_value->dtype(loco::DataType::FLOAT32);
+ }
+
+ _one_hot = g()->nodes()->template create<luci::CircleOneHot>();
+ {
+ _one_hot->indices(input());
+ _one_hot->depth(_depth);
+ _one_hot->on_value(_on_value);
+ _one_hot->off_value(_off_value);
+ _one_hot->axis(-1);
+ _one_hot->dtype(loco::DataType::FLOAT32);
+ _one_hot->name("test");
+ }
+ output()->from(_one_hot);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+private:
+ luci::CircleOneHot *_one_hot = nullptr;
+ luci::CircleConst *_depth = nullptr;
+ luci::CircleConst *_on_value = nullptr;
+ luci::CircleConst *_off_value = nullptr;
+};
+
// Test graph for comparison Ops
// GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL
template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph
@@ -866,6 +956,7 @@ public:
{
_div->x(input());
_div->y(_const);
+ _div->name("test");
}
output()->from(_div);
@@ -893,6 +984,7 @@ public:
{
_floor_div->x(input());
_floor_div->y(_const);
+ _floor_div->name("test");
}
output()->from(_floor_div);
@@ -917,6 +1009,7 @@ public:
_rsqrt = g()->nodes()->create<luci::CircleRsqrt>();
{
_rsqrt->x(input());
+ _rsqrt->name("test");
}
output()->from(_rsqrt);
@@ -936,6 +1029,7 @@ public:
_sqrt = g()->nodes()->create<luci::CircleSqrt>();
{
_sqrt->x(input());
+ _sqrt->name("test");
}
output()->from(_sqrt);
@@ -955,6 +1049,7 @@ public:
_elu = g()->nodes()->create<luci::CircleElu>();
{
_elu->features(input());
+ _elu->name("test");
}
output()->from(_elu);
@@ -977,6 +1072,7 @@ public:
{
_pow->x(input());
_pow->y(_const);
+ _pow->name("test");
}
output()->from(_pow);
@@ -1004,6 +1100,7 @@ public:
{
_resize_bilinear->input(input());
_resize_bilinear->size(_size);
+ _resize_bilinear->name("test");
}
output()->from(_resize_bilinear);
@@ -1027,6 +1124,7 @@ public:
{
_resize_nearest_neighbor->input(input());
_resize_nearest_neighbor->size(_size);
+ _resize_nearest_neighbor->name("test");
}
output()->from(_resize_nearest_neighbor);
@@ -1067,6 +1165,62 @@ private:
luci::CircleConst *_unpack_dim = nullptr;
};
+class MulTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+
+ _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _mul = g()->nodes()->create<luci::CircleMul>();
+ {
+ _mul->x(input());
+ _mul->y(_const);
+ _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _mul->name("test");
+ }
+ output()->from(_mul);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x() { return _mul->x(); }
+ loco::Node *y() { return _mul->y(); }
+
+private:
+ luci::CircleMul *_mul = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+class AddTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+
+ _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _add = g()->nodes()->create<luci::CircleAdd>();
+ {
+ _add->x(input());
+ _add->y(_const);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _add->name("test");
+ }
+ output()->from(_add);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x() { return _add->x(); }
+ loco::Node *y() { return _add->y(); }
+
+private:
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
} // namespace
// Quantize and verify with given configurations
@@ -1078,6 +1232,15 @@ private:
EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \
} while (0)
+// Quantize and verify with layer info
+#define TEST_WITH_LAYER_INFO(graph, type, granularity) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ EXPECT_NO_THROW(quantize_and_verify_with_layer_info(g.g(), type, granularity)); \
+ } while (0)
+
// Quantize and verify with wrong type
#define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype) \
do \
@@ -1098,25 +1261,34 @@ private:
// Quantize and verify with wrong type
// Users can specify the test target
-#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \
- do \
- { \
- graph g; \
- g.init(); \
- auto node = loco::must_cast<luci::CircleNode *>(target); \
- EXPECT_ANY_THROW( \
- quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype, node)); \
+#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ auto node = loco::must_cast<luci::CircleNode *>(target); \
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \
+ pass.run(g.g()); \
+ auto after_node = loco::must_cast<luci::CircleNode *>(target); \
+ after_node->dtype(wrong_dtype); \
+ luci::QuantizedModelVerifier verifier(type, granularity); \
+ EXPECT_ANY_THROW(verifier.verify(g.g())); \
} while (0)
// Quantize and verify with wrong granularity
// Users can specify the test target
-#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
- do \
- { \
- graph g; \
- g.init(); \
- auto node = loco::must_cast<luci::CircleNode *>(target); \
- EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity, node)); \
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ auto node = loco::must_cast<luci::CircleNode *>(target); \
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \
+ pass.run(g.g()); \
+ auto after_node = loco::must_cast<luci::CircleNode *>(target); \
+ insert_scale_zp(after_node, 1.0, 1); \
+ luci::QuantizedModelVerifier verifier(type, granularity); \
+ EXPECT_ANY_THROW(verifier.verify(g.g())); \
} while (0)
// Test a local helper function
@@ -1145,6 +1317,10 @@ TEST(QuantizedModelVerifierTest, InstanceNorm)
TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1169,6 +1345,10 @@ TEST(QuantizedModelVerifierTest, LocalResponseNormalization)
TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1199,6 +1379,10 @@ TEST(QuantizedModelVerifierTest, Logistic)
TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1223,6 +1407,10 @@ TEST(QuantizedModelVerifierTest, Softmax)
TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1247,6 +1435,10 @@ TEST(QuantizedModelVerifierTest, SpaceToBatchND)
TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1271,6 +1463,10 @@ TEST(QuantizedModelVerifierTest, SpaceToDepth)
TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1299,6 +1495,14 @@ TEST(QuantizedModelVerifierTest, Slice)
TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1379,6 +1583,10 @@ TEST(QuantizedModelVerifierTest, StridedSlice)
TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1463,6 +1671,10 @@ TEST(QuantizedModelVerifierTest, BatchToSpaceND)
TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1487,6 +1699,10 @@ TEST(QuantizedModelVerifierTest, DepthToSpace)
TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1511,6 +1727,10 @@ TEST(QuantizedModelVerifierTest, Concatenation)
TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1557,6 +1777,10 @@ TEST(QuantizedModelVerifierTest, Reshape)
TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1581,6 +1805,10 @@ TEST(QuantizedModelVerifierTest, Tanh)
TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(TanhTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1606,6 +1834,10 @@ TEST(QuantizedModelVerifierTest, Pack)
TEST_WITH_GRAPH(PackTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(PackTestGraph, Type::S16, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(PackTestGraph, Type::S16, Granularity::ChannelWise);
+
// Test if Pack's qparam is propagated to the input
{
PackTestGraph g;
@@ -1640,6 +1872,10 @@ TEST(QuantizedModelVerifierTest, Pad)
TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(PadTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1664,6 +1900,10 @@ TEST(QuantizedModelVerifierTest, PadV2)
TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1688,6 +1928,10 @@ TEST(QuantizedModelVerifierTest, MirrorPad)
TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1712,6 +1956,10 @@ TEST(QuantizedModelVerifierTest, Transpose)
TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1736,6 +1984,10 @@ TEST(QuantizedModelVerifierTest, Floor)
TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(FloorTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1869,11 +2121,59 @@ TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG)
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, OneHot)
+{
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_input_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
TEST(QuantizedModelVerifierTest, Div)
{
TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(DivTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1902,6 +2202,10 @@ TEST(QuantizedModelVerifierTest, FloorDiv)
TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1930,6 +2234,10 @@ TEST(QuantizedModelVerifierTest, Rsqrt)
TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1954,6 +2262,10 @@ TEST(QuantizedModelVerifierTest, Sqrt)
TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -1978,6 +2290,10 @@ TEST(QuantizedModelVerifierTest, Elu)
TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(EluTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -2002,6 +2318,10 @@ TEST(QuantizedModelVerifierTest, Pow)
TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(PowTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -2030,6 +2350,10 @@ TEST(QuantizedModelVerifierTest, ResizeBilinear)
TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -2054,6 +2378,10 @@ TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor)
TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise);
TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise);
TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
SUCCEED();
}
@@ -2099,6 +2427,93 @@ TEST(QuantizedModelVerifierTest, Unpack_wrong_granularity_NEG)
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, Add)
+{
+ TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(AddTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(AddTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(AddTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul)
+{
+ TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(MulTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(MulTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(MulTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+// TODO Add following testcases
+//
+// CircleConv2D
+//
+// CircleDepthwiseConv2D
+//
+// CirclePRelu
+//
+// CircleTransposeConv
+//
+// CircleFullyConnected
+//
+// CircleAveragePool2D
+//
+// CircleMaxPool2D
+//
+// CircleMean
+//
+// CircleRelu
+//
+// CircleCast
+//
+
#undef TEST_WITH_GRAPH
#undef TEST_WITH_WRONG_TYPE
#undef TEST_WITH_WRONG_GRANULARITY
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp
new file mode 100644
index 000000000..8a10ad4a0
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNode.h>
+
+/**
+ * Remove redundant quantize operations. For subsequent Quantize Ops,
+ * only the last Quantize Op is valid, so we can remove the rest of the Quantize Op.
+ *
+ * BEFORE
+ * [CircleNode_1]
+ * |
+ * [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ * |
+ * [CircleQuantize, dtype_2, scale_2, zero_point_2]
+ * |
+ * [CircleNode_2]
+ *
+ * AFTER
+ * [CircleNode_1]
+ * / \
+ * / \
+ * / \
+ * / \
+ * / \
+ * [CircleQuantize, dtype_2, scale_2, zero_point_2] [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ * |
+ * [CircleNode_2]
+ *
+ */
+
+namespace
+{
+
+bool remove_redundant_quantize(luci::CircleQuantize *node)
+{
+ auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+ if (node->quantparam() == nullptr or pred_node->quantparam() == nullptr)
+ return false;
+
+ if (node->quantparam()->scale.size() != 1 or node->quantparam()->zerop.size() != 1 or
+ pred_node->quantparam()->scale.size() != 1 or pred_node->quantparam()->zerop.size() != 1)
+ {
+ return false;
+ }
+
+ if (node->dtype() != pred_node->dtype() or
+ pred_node->quantparam()->scale.at(0) != node->quantparam()->scale.at(0) or
+ pred_node->quantparam()->zerop.at(0) != node->quantparam()->zerop.at(0))
+ {
+ return false;
+ }
+
+ replace(node).with(pred_node);
+
+ return true;
+}
+
+bool remove_redundant_subsequent_quantize(luci::CircleQuantize *node)
+{
+ auto pred_node = dynamic_cast<luci::CircleQuantize *>(node->input());
+ if (pred_node == nullptr)
+ return remove_redundant_quantize(node);
+
+ node->input(pred_node->input());
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveRedundantQuantizePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ if (auto quantize_node = dynamic_cast<luci::CircleQuantize *>(node))
+ {
+ if (remove_redundant_subsequent_quantize(quantize_node))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp
new file mode 100644
index 000000000..d0166bd20
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class QuantizeGraphlet
+{
+public:
+ QuantizeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ _first_quantize = g->nodes()->create<luci::CircleQuantize>();
+ _first_quantize->dtype(loco::DataType::U8);
+ {
+ auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+ quantize_param->scale = {0.5};
+ quantize_param->zerop = {0};
+ _first_quantize->quantparam(std::move(quantize_param));
+ }
+ _first_quantize->name("first_quantize");
+
+ _second_quantize = g->nodes()->create<luci::CircleQuantize>();
+ _second_quantize->dtype(loco::DataType::U8);
+ {
+ auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+ quantize_param->scale = {0.5};
+ quantize_param->zerop = {0};
+ _second_quantize->quantparam(std::move(quantize_param));
+ }
+ _second_quantize->name("second_quantize");
+ }
+
+protected:
+ luci::CircleQuantize *_first_quantize = nullptr;
+ luci::CircleQuantize *_second_quantize = nullptr;
+};
+
+class RedundantSubsequentQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+ RedundantSubsequentQuantizeGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ QuantizeGraphlet::init(g());
+
+ input()->dtype(loco::DataType::U8);
+ {
+ auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+ quantize_param->scale = {1};
+ quantize_param->zerop = {1};
+ input()->quantparam(std::move(quantize_param));
+ }
+
+ _first_quantize->input(input());
+ _second_quantize->input(_first_quantize);
+
+ output()->from(_second_quantize);
+ output()->dtype(loco::DataType::U8);
+ }
+};
+
+class RedundantQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+ RedundantQuantizeGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ QuantizeGraphlet::init(g());
+
+ input()->dtype(loco::DataType::U8);
+ {
+ auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+ quantize_param->scale = {0.5};
+ quantize_param->zerop = {0};
+ input()->quantparam(std::move(quantize_param));
+ }
+
+ _first_quantize->input(input());
+
+ output()->from(_first_quantize);
+ output()->dtype(loco::DataType::U8);
+ }
+};
+
+} // namespace
+
+TEST(RemoveRedundantQuantizePass, name)
+{
+ luci::RemoveRedundantQuantizePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_subsequent_quantize)
+{
+ RedundantSubsequentQuantizeGraph g;
+ luci::RemoveRedundantQuantizePass pass;
+
+ g.init();
+
+ EXPECT_TRUE(pass.run(g.g()));
+
+ int count = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+ {
+ if (dynamic_cast<luci::CircleQuantize *>(node))
+ {
+ count++;
+ }
+ }
+
+ ASSERT_EQ(1, count);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_quantize)
+{
+ RedundantQuantizeGraph g;
+ luci::RemoveRedundantQuantizePass pass;
+
+ g.init();
+
+ EXPECT_TRUE(pass.run(g.g()));
+
+ int count = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+ {
+ if (dynamic_cast<luci::CircleQuantize *>(node))
+ {
+ count++;
+ }
+ }
+
+ ASSERT_EQ(0, count);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
index 71c51ecda..75cf72795 100644
--- a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
@@ -71,7 +71,7 @@ bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node)
for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++)
{
new_const_node->at<loco::DataType::S32>(i) =
- target_perm->at<loco::DataType::S32>(pred_perm->at<loco::DataType::S32>(i));
+ pred_perm->at<loco::DataType::S32>(target_perm->at<loco::DataType::S32>(i));
}
new_const_node->name(name + "/Transpose/perm");
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
index e80623499..bb8e292d4 100644
--- a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
@@ -271,6 +271,31 @@ TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
}
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type3)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose(graph.get(), {0, 3, 2, 1}, {0, 2, 3, 1});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ ASSERT_NE(nullptr, transpose_node);
+ auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+ ASSERT_EQ(0, perm->at<loco::DataType::S32>(0));
+ ASSERT_EQ(2, perm->at<loco::DataType::S32>(1));
+ ASSERT_EQ(1, perm->at<loco::DataType::S32>(2));
+ ASSERT_EQ(3, perm->at<loco::DataType::S32>(3));
+}
+
/**
* @brief Test case that first transpose output become input of operations more than one.
*/
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
index 3f0c4ee82..fb46f490d 100644
--- a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
@@ -58,6 +58,25 @@ bool remove_no_effect_reshape(luci::CircleNode *node)
namespace luci
{
+/**
+ * BEFORE
+ * [CircleNode]
+ * |
+ * [CircleReshape]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ * [CircleNode]
+ * | \
+ * | [CircleReshape]
+ * |
+ * [CircleNode]
+ *
+ * NOTE
+ * This pass will remove Reshape when input and output has same shape
+ */
+
bool RemoveUnnecessaryReshapePass::run(loco::Graph *g)
{
bool changed = false;
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
index a0cc0194f..bca0a9483 100644
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
@@ -26,8 +26,17 @@ namespace
luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
{
- assert(gamma->rank() == 1);
- auto channel_size = gamma->dim(0).value();
+ assert(gamma->rank() == 1 or gamma->rank() == 4);
+
+ uint32_t channel_idx = gamma->rank() - 1;
+ uint32_t channel_size = gamma->dim(channel_idx).value();
+
+ // Gamma should be broadcastable in the channel direction
+ for (uint32_t i = 0; i < gamma->rank(); i++)
+ {
+ if (i != channel_idx)
+ assert(gamma->dim(i).value() == 1); // FIX is_batchnorm_mul UNLESS
+ }
auto name = gamma->name();
assert(name.length() > 0);
@@ -53,8 +62,17 @@ luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
{
- assert(beta->rank() == 1);
- auto channel_size = beta->dim(0).value();
+ assert(beta->rank() == 1 or beta->rank() == 4);
+
+ uint32_t channel_idx = beta->rank() - 1;
+ uint32_t channel_size = beta->dim(channel_idx).value();
+
+ // Beta should be broadcastable in the channel direction
+ for (uint32_t i = 0; i < beta->rank(); i++)
+ {
+ if (i != channel_idx)
+ assert(beta->dim(i).value() == 1); // FIX is_batchnorm_add UNLESS
+ }
auto name = beta->name();
assert(name.length() > 0);
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
index 903d4dcc9..bac033112 100644
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
@@ -141,6 +141,37 @@ TEST(ReplaceMulAddWithDepthwiseConv, simple)
}
}
+TEST(ReplaceMulAddWithDepthwiseConv, simple_rank4)
+{
+ SimpleGraph g;
+
+ const uint32_t channel_size = 16;
+ g.gamma->shape({1, 1, 1, channel_size});
+ g.beta->shape({1, 1, 1, channel_size});
+
+ luci::ReplaceMulAddWithDepthwiseConvPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from());
+ EXPECT_NE(nullptr, dwconv);
+
+ auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+ auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+ EXPECT_NE(nullptr, weights);
+ EXPECT_EQ(4, weights->rank());
+ EXPECT_EQ(channel_size, weights->dim(3).value());
+ EXPECT_NE(nullptr, bias);
+ EXPECT_EQ(1, bias->rank());
+ EXPECT_EQ(channel_size, bias->dim(0).value());
+
+ for (int i = 0; i < channel_size; i++)
+ {
+ EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i));
+ EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+ }
+}
+
TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
{
SimpleGraph g;
@@ -154,3 +185,18 @@ TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
EXPECT_EQ(false, changed);
}
+
+TEST(ReplaceMulAddWithDepthwiseConv, rank3_NEG)
+{
+ SimpleGraph g;
+
+ g.input->shape({4, 4, 16});
+ g.mul->shape({4, 4, 16});
+ g.add->shape({4, 4, 16});
+ g.output->shape({4, 4, 16});
+
+ luci::ReplaceMulAddWithDepthwiseConvPass pass;
+ auto changed = pass.run(&g.g);
+
+ EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
index 9cba9a9e7..57c386d99 100644
--- a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
@@ -24,15 +24,6 @@
namespace
{
-void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
-{
- auto q = src->quantparam();
- if (q == nullptr)
- dst->quantparam(nullptr);
- else
- dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
-}
-
// SplitV is substituted to Split if the contents of size_splits are all same
// For example,
// size_splits = [32, 32] -> substitute
@@ -67,7 +58,7 @@ bool resolve_splitv(luci::CircleSplitV *sv)
split_node->split_dim(sv->split_dim());
split_node->num_split(sv->num_split());
split_node->name(sv->name());
- copy_quantparam(split_node, sv);
+ copy_quantparam(sv, split_node);
luci::add_origin(split_node, luci::get_origin(sv));
auto succs = loco::succs(sv);
@@ -78,7 +69,7 @@ bool resolve_splitv(luci::CircleSplitV *sv)
so_node->input(split_node);
so_node->index(svo->index());
so_node->name(svo->name());
- copy_quantparam(so_node, svo);
+ copy_quantparam(svo, so_node);
luci::add_origin(so_node, luci::get_origin(svo));
replace(svo).with(so_node);
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
index f48763782..df7266df9 100644
--- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -76,18 +76,6 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input)
}
/**
- * @brief copy quantparam of src to dst
- */
-void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
-{
- auto q = src->quantparam();
- if (q == nullptr)
- dst->quantparam(nullptr);
- else
- dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
-}
-
-/**
* @brief return CircleConst ptr with values of new_shape
*/
luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape)
@@ -142,7 +130,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
auto graph = squeeze->graph();
auto reshape = graph->nodes()->create<luci::CircleReshape>();
auto shape_const = create_shape_const(graph, reshape_shape);
- copy_quantparam(reshape, squeeze);
+ copy_quantparam(squeeze, reshape);
reshape->name(name + "/Reshape");
luci::add_origin(reshape, luci::get_origin(squeeze));
shape_const->name(name + "/Reshape/shape");
diff --git a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
index f50f2f54f..9e1c5a4a3 100644
--- a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
@@ -124,7 +124,7 @@ bool substitute_strided_slice_to_reshape(luci::CircleStridedSlice *ss_node)
std::bitset<32> end_mask(ss_node->end_mask());
std::bitset<32> shrink_axis_mask(ss_node->shrink_axis_mask());
- uint input_rank = input_node->rank();
+ uint32_t input_rank = input_node->rank();
for (uint32_t i = 0; i < input_rank; i++)
{
if (!input_node->dim(i).known())
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp
new file mode 100644
index 000000000..e65d576cd
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedBiasScale.h"
+
+#include <cmath>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace
+{
+
+bool same(float a, float b)
+{
+ constexpr float epsilon = 1e-10;
+ return abs(a - b) < epsilon;
+}
+
+// Check bias scale = input scale * weight scale
+// This function checks both LWQ and CWQ
+bool check_bias_scale(const loco::Node *input, const loco::Node *weights, const loco::Node *bias)
+{
+ auto input_node = loco::must_cast<const luci::CircleNode *>(input);
+ auto input_qparam = input_node->quantparam();
+ RETURN_FALSE_UNLESS(input_qparam != nullptr);
+
+ auto weights_node = loco::must_cast<const luci::CircleNode *>(weights);
+ auto weights_qparam = weights_node->quantparam();
+ RETURN_FALSE_UNLESS(weights_qparam != nullptr);
+
+ auto bias_node = loco::must_cast<const luci::CircleNode *>(bias);
+ auto bias_qparam = bias_node->quantparam();
+ RETURN_FALSE_UNLESS(bias_qparam != nullptr);
+
+ RETURN_FALSE_UNLESS(input_qparam->scale.size() == 1);
+ RETURN_FALSE_UNLESS(weights_qparam->scale.size() == bias_qparam->scale.size());
+
+ auto input_scale = input_qparam->scale[0];
+ for (uint32_t i = 0; i < weights_qparam->scale.size(); i++)
+ {
+ auto weights_scale = weights_qparam->scale[i];
+ auto bias_scale = bias_qparam->scale[i];
+ RETURN_FALSE_UNLESS(same(bias_scale, input_scale * weights_scale));
+ }
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleConv2D *node)
+{
+ RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+ return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleDepthwiseConv2D *node)
+{
+ RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+ return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleFullyConnected *node)
+{
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ {
+ RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->weights(), node->bias()));
+ }
+ return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleTransposeConv *node)
+{
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ {
+ RETURN_FALSE_UNLESS(check_bias_scale(node->outBackprop(), node->filter(), node->bias()));
+ }
+ return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.h b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h
new file mode 100644
index 000000000..b41f78eca
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+#define __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the scale of quantized bias node
+ * @details
+ *
+ * Bias of CONV, DCONV, TCONV, FC layers should meet the following condition.
+ *
+ * bias scale = input scale * weights scale
+ */
+class VerifyQuantizedBiasScale : public luci::CircleNodeVisitor<bool>
+{
+public:
+ static std::shared_ptr<VerifyQuantizedBiasScale> create()
+ {
+ return std::make_shared<VerifyQuantizedBiasScale>();
+ };
+
+public:
+ bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+private:
+ // Operators with bias
+ bool visit(const luci::CircleConv2D *node);
+ bool visit(const luci::CircleDepthwiseConv2D *node);
+ bool visit(const luci::CircleFullyConnected *node);
+ bool visit(const luci::CircleTransposeConv *node);
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp
new file mode 100644
index 000000000..8697090a7
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeGranularity.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <memory>
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeGranularity>
+VerifyQuantizedNodeGranularity::create(Granularity granularity)
+{
+ if (granularity == Granularity::ChannelWise)
+ return std::make_shared<VerifyQuantizedNodeChannelWiseGranularity>();
+ else if (granularity == Granularity::LayerWise)
+ return std::make_shared<VerifyQuantizedNodeLayerWiseGranularity>();
+ else
+ throw std::domain_error("Not supported Granularity type");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
index bf3ff2e8a..442183c18 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -13,13 +14,15 @@
* limitations under the License.
*/
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
#include <luci/Pass/QuantizationParameters.h>
+#include <memory>
+
using Granularity = luci::QuantizationGranularity;
// This macro is undef at the end of the file
@@ -33,16 +36,19 @@ namespace luci
{
/**
- * @brief Verify the granualrity of channel-wise quantized node
+ * @brief Verify the granualrity of quantized node
* @details
*
* Targets to verify
* - node's output (i.e., node itself)
* - node's inputs
*/
-struct VerifyQuantizedNodeChannelWiseGranularity final : public luci::CircleNodeVisitor<bool>
+class VerifyQuantizedNodeGranularity : public luci::CircleNodeVisitor<bool>
{
-private:
+public:
+ static std::shared_ptr<VerifyQuantizedNodeGranularity> create(Granularity granularity);
+
+protected:
bool is_lwq(const loco::Node *node)
{
auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
@@ -59,48 +65,15 @@ private:
return true;
}
- uint32_t rank(const loco::Node *node)
- {
- auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
- return circle_node->rank();
- }
-
- bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
- {
- auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
-
- assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
- auto channel_size = circle_node->dim(channel_dim).value();
-
- if (circle_node->quantparam() == nullptr)
- return false;
-
- if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
- return false;
-
- if (circle_node->quantparam()->scale.size() != channel_size)
- return false;
-
- if (circle_node->quantparam()->zerop.size() != channel_size)
- return false;
-
- return true;
- }
-
private:
- bool visit(const luci::CircleConv2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
- return true;
- }
+ virtual bool visit(const luci::CircleConv2D *node) = 0;
bool visit(const luci::CircleConcatenation *node)
{
+ // Skip granularity check for concatenation of indices
+ if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+ return true;
+
RETURN_FALSE_UNLESS(is_lwq(node))
for (uint32_t i = 0; i < node->numValues(); i++)
{
@@ -116,25 +89,9 @@ private:
return true;
}
- bool visit(const luci::CircleDepthwiseConv2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
- return true;
- }
+ virtual bool visit(const luci::CircleDepthwiseConv2D *node) = 0;
- bool visit(const luci::CircleInstanceNorm *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
- RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
- return true;
- }
+ virtual bool visit(const luci::CircleInstanceNorm *node) = 0;
bool visit(const luci::CirclePack *node)
{
@@ -168,37 +125,11 @@ private:
return true;
}
- bool visit(const luci::CirclePRelu *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
- return true;
- }
-
- bool visit(const luci::CircleTransposeConv *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ virtual bool visit(const luci::CirclePRelu *node) = 0;
- return true;
- }
+ virtual bool visit(const luci::CircleTransposeConv *node) = 0;
- bool visit(const luci::CircleFullyConnected *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- // Bias is optional (it can be CircleOutputExclude)
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
- return true;
- }
+ virtual bool visit(const luci::CircleFullyConnected *node) = 0;
bool visit(const luci::CircleAdd *node)
{
@@ -258,6 +189,14 @@ private:
return true;
}
+ bool visit(const luci::CircleOneHot *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->off_value()));
+ RETURN_FALSE_UNLESS(is_lwq(node->on_value()));
+ return true;
+ }
+
bool visit(const luci::CircleRelu *node)
{
RETURN_FALSE_UNLESS(is_lwq(node));
@@ -480,8 +419,186 @@ private:
bool visit(const luci::CircleNode *) { return true; }
};
+class VerifyQuantizedNodeChannelWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+ uint32_t rank(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ return circle_node->rank();
+ }
+
+ bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+ assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
+ auto channel_size = circle_node->dim(channel_dim).value();
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != channel_size)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != channel_size)
+ return false;
+
+ return true;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ // Bias is optional (it can be CircleOutputExclude)
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+};
+
+class VerifyQuantizedNodeLayerWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+ bool is_lwq_const(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != 1)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != 1)
+ return false;
+
+ return true;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+};
+
} // namespace luci
#undef RETURN_FALSE_UNLESS
-#endif // __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
deleted file mode 100644
index 9bc8b31df..000000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Pass/QuantizationParameters.h>
-
-using Granularity = luci::QuantizationGranularity;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
- if (not(ARG)) \
- { \
- return false; \
- }
-
-namespace luci
-{
-
-/**
- * @brief Verify the granualrity of layer-wise quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeLayerWiseGranularity final : public luci::CircleNodeVisitor<bool>
-{
-private:
- bool is_lwq(const loco::Node *node)
- {
- auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-
- if (circle_node->quantparam() == nullptr)
- return false;
-
- if (circle_node->quantparam()->scale.size() != 1)
- return false;
-
- if (circle_node->quantparam()->zerop.size() != 1)
- return false;
-
- return true;
- }
-
- bool is_lwq_const(const loco::Node *node)
- {
- auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
-
- if (circle_node->quantparam() == nullptr)
- return false;
-
- if (circle_node->quantparam()->scale.size() != 1)
- return false;
-
- if (circle_node->quantparam()->zerop.size() != 1)
- return false;
-
- return true;
- }
-
-private:
- bool visit(const luci::CircleConv2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
- return true;
- }
-
- bool visit(const luci::CircleConcatenation *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- for (uint32_t i = 0; i < node->numValues(); i++)
- {
- RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
- }
- return true;
- }
-
- bool visit(const luci::CircleDepthToSpace *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- return true;
- }
-
- bool visit(const luci::CircleDepthwiseConv2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
- return true;
- }
-
- bool visit(const luci::CircleInstanceNorm *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
- return true;
- }
-
- bool visit(const luci::CirclePack *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- for (uint32_t i = 0; i < node->values_count(); i++)
- {
- RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
- }
- return true;
- }
-
- bool visit(const luci::CirclePad *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- return true;
- }
-
- bool visit(const luci::CirclePadV2 *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq(node->constant_values()))
- return true;
- }
-
- bool visit(const luci::CircleMirrorPad *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- return true;
- }
-
- bool visit(const luci::CirclePRelu *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
- return true;
- }
-
- bool visit(const luci::CircleTransposeConv *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
- return true;
- }
-
- bool visit(const luci::CircleFullyConnected *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()))
- RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
- return true;
- }
-
- bool visit(const luci::CircleAdd *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleAveragePool2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->value()));
- return true;
- }
-
- bool visit(const luci::CircleLogicalOr *)
- {
- // Logical OR has bool-type inputs and output
- // Nothing to be checked
- return true;
- }
-
- bool visit(const luci::CircleMaxPool2D *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->value()));
- return true;
- }
-
- bool visit(const luci::CircleLocalResponseNormalization *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleMean *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleMul *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleNotEqual *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleRelu *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node))
- RETURN_FALSE_UNLESS(is_lwq(node->features()));
- return true;
- }
-
- bool visit(const luci::CircleReshape *node)
- {
- auto input = loco::must_cast<const luci::CircleNode *>(node->tensor());
- bool input_quantized = input->quantparam() != nullptr;
- bool node_quantized = node->quantparam() != nullptr;
- RETURN_FALSE_UNLESS(input_quantized == node_quantized);
- RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node))
- RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
- return true;
- }
-
- bool visit(const luci::CircleLogistic *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- return true;
- }
-
- bool visit(const luci::CircleSoftmax *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->logits()));
- return true;
- }
-
- bool visit(const luci::CircleSpaceToBatchND *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleSpaceToDepth *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleSlice *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleSplit *node)
- {
- // node's output is the input of CircleSplitOut, thus not quantized
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleSplitOut *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- return true;
- }
-
- bool visit(const luci::CircleSplitV *node)
- {
- // node's output is the input of CircleSplitVOut, thus not quantized
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleSplitVOut *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- return true;
- }
-
- bool visit(const luci::CircleStridedSlice *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleArgMax *node)
- {
- // node's output is index, thus not quantized
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleBatchToSpaceND *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleTanh *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- return true;
- }
-
- bool visit(const luci::CircleTranspose *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->a()));
- return true;
- }
-
- bool visit(const luci::CircleFloor *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- return true;
- }
-
- bool visit(const luci::CircleGreater *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleGreaterEqual *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleDiv *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleFloorDiv *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleRsqrt *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- return true;
- }
-
- bool visit(const luci::CircleSqrt *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- return true;
- }
-
- bool visit(const luci::CircleElu *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->features()));
- return true;
- }
-
- bool visit(const luci::CirclePow *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->x()));
- RETURN_FALSE_UNLESS(is_lwq(node->y()));
- return true;
- }
-
- bool visit(const luci::CircleResizeBilinear *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleResizeNearestNeighbor *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- RETURN_FALSE_UNLESS(is_lwq(node->input()));
- return true;
- }
-
- bool visit(const luci::CircleUnpack *node)
- {
- // node's output is the input of CircleUnpackOut, thus not quantized
- RETURN_FALSE_UNLESS(is_lwq(node->value()));
- return true;
- }
-
- bool visit(const luci::CircleUnpackOut *node)
- {
- RETURN_FALSE_UNLESS(is_lwq(node));
- return true;
- }
-
- bool visit(const luci::CircleCast *node)
- {
- auto input = loco::must_cast<const luci::CircleNode *>(node->x());
- bool input_quantized = input->quantparam() != nullptr;
- bool node_quantized = node->quantparam() != nullptr;
- RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
- RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node));
- return true;
- }
-
- // TODO: Implement more Ops
-
- bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
deleted file mode 100644
index eeec7b82b..000000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <cmath>
-
-using Type = loco::DataType;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
- if (not(ARG)) \
- { \
- return false; \
- }
-
-namespace luci
-{
-
-/**
- * @brief Verify the data type of INT16 quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeS16Type final : public luci::CircleNodeVisitor<bool>
-{
-private:
- bool has_type(const loco::Node *node, Type dtype)
- {
- auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
- return circle_node->dtype() == dtype;
- }
-
-private:
- bool visit(const luci::CircleConv2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleConcatenation *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- for (uint32_t i = 0; i < node->numValues(); i++)
- {
- RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
- }
- return true;
- }
-
- bool visit(const luci::CircleDepthToSpace *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleDepthwiseConv2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleInstanceNorm *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->beta(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CirclePack *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- for (uint32_t i = 0; i < node->values_count(); i++)
- {
- RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
- }
- return true;
- }
-
- bool visit(const luci::CirclePad *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CirclePadV2 *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleMirrorPad *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CirclePRelu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleTransposeConv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleFullyConnected *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->weights(), Type::S16))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleAdd *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleAveragePool2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleLogicalOr *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
- return true;
- }
-
- bool visit(const luci::CircleMaxPool2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleLocalResponseNormalization *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleMean *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleMul *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleNotEqual *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleRelu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleReshape *node)
- {
- if (node->quantparam())
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::S16))
- }
- else
- {
- RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
- }
- luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
- if (shape != nullptr)
- RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleLogistic *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
- return true;
- }
-
- bool visit(const luci::CircleSoftmax *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->logits(), Type::S16))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
- return true;
- }
-
- bool visit(const luci::CircleSpaceToBatchND *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleSpaceToDepth *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleSlice *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
- RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleSplit *node)
- {
- // node's output is the input of CircleSplitOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleSplitOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
- // SplitOut has the same qparam with the input of Split
- auto split = loco::must_cast<luci::CircleSplit *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(split->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleSplitV *node)
- {
- // node's output is the input of CircleSplitVOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleSplitVOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
- // SplitVOut has the same qparam with the input of SplitV
- auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleStridedSlice *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-
- auto input = loco::must_cast<luci::CircleNode *>(node->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleArgMax *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
- has_type(node->dimension(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleBatchToSpaceND *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleTanh *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
- return true;
- }
-
- bool visit(const luci::CircleTranspose *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->a(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleFloor *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
- // This checks the value of scale is an integer
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
- return true;
- }
-
- bool visit(const luci::CircleGreater *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleGreaterEqual *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleDiv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleFloorDiv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-
- // This checks the value of scale is an integer
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
- return true;
- }
-
- bool visit(const luci::CircleRsqrt *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleSqrt *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleElu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CirclePow *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleResizeBilinear *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleResizeNearestNeighbor *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleUnpack *node)
- {
- // node's output is the input of CircleUnpackOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
- return true;
- }
-
- bool visit(const luci::CircleUnpackOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
- // UnpackOut has the same qparam with the input of Unpack
- auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
- RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleCast *node)
- {
- auto *input = loco::must_cast<luci::CircleNode *>(node->x());
- RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
-
- bool input_quantized = input->quantparam() != nullptr;
- if (input_quantized)
- RETURN_FALSE_UNLESS(has_type(input, Type::S16))
-
- RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
-
- bool node_quantized = node->quantparam() != nullptr;
- if (node_quantized)
- RETURN_FALSE_UNLESS(has_type(node, Type::S16))
- return true;
- }
-
- // TODO: Implement more Ops
-
- bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUNTIZED_NODE_S16_TYPE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
new file mode 100644
index 000000000..4e1c062c0
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeType.h"
+
+#include <cmath>
+#include <memory>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeType> VerifyQuantizedNodeType::create(loco::DataType dtype)
+{
+ if (dtype == loco::DataType::U8)
+ return std::make_shared<VerifyQuantizedNodeU8Type>();
+ else if (dtype == loco::DataType::S16)
+ return std::make_shared<VerifyQuantizedNodeS16Type>();
+ else
+ throw std::domain_error("Not supported Quantized type");
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleArgMax *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->dimension(), loco::DataType::S32) ||
+ has_type(node->dimension(), loco::DataType::S64))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAveragePool2D *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleBatchToSpaceND *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleCast *node)
+{
+ auto *input = loco::must_cast<luci::CircleNode *>(node->x());
+ bool input_quantized = input->quantparam() != nullptr;
+ if (input_quantized)
+ {
+ RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
+ RETURN_FALSE_UNLESS(has_type(input, Qtype))
+ }
+
+ bool node_quantized = node->quantparam() != nullptr;
+ if (node_quantized)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ }
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConv2D *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConcatenation *node)
+{
+ // Allow concatenation of indices
+ if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+ return true;
+
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthToSpace *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthwiseConv2D *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDiv *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleElu *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloor *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+ // This checks the value of scale is an integer
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloorDiv *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+ // This checks the value of scale is an integer
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFullyConnected *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->weights(), Qtype))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(has_type(bias, Btype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreater *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreaterEqual *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleInstanceNorm *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(
+ const luci::CircleLocalResponseNormalization *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleLogicalOr *node)
+{
+ return group_has_type(node, loco::DataType::BOOL);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMaxPool2D *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMean *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMirrorPad *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleNotEqual *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleOneHot *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype));
+ RETURN_FALSE_UNLESS(has_type(node->indices(), loco::DataType::S32) ||
+ has_type(node->indices(), loco::DataType::S64));
+ RETURN_FALSE_UNLESS(has_type(node->depth(), loco::DataType::S32));
+ RETURN_FALSE_UNLESS(has_type(node->on_value(), Qtype));
+ RETURN_FALSE_UNLESS(has_type(node->off_value(), Qtype));
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePack *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePad *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePadV2 *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+ RETURN_FALSE_UNLESS(has_type(node->constant_values(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePRelu *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReshape *node)
+{
+ if (node->quantparam())
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->tensor(), Qtype))
+ }
+ else
+ {
+ RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
+ }
+ luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+ if (shape != nullptr)
+ RETURN_FALSE_UNLESS(has_type(shape, loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeBilinear *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRsqrt *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSlice *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->begin(), loco::DataType::S32) ||
+ has_type(node->begin(), loco::DataType::S64))
+ RETURN_FALSE_UNLESS(has_type(node->size(), loco::DataType::S32) ||
+ has_type(node->size(), loco::DataType::S64))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToBatchND *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToDepth *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplit *node)
+{
+ // node's output is the input of CircleSplitOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitOut *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+ // SplitOut has the same qparam with the input of Split
+ auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(split->input());
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitV *node)
+{
+ // node's output is the input of CircleSplitVOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitVOut *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+ // SplitVOut has the same qparam with the input of SplitV
+ auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSqrt *node)
+{
+ return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleStridedSlice *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+
+ auto input = loco::must_cast<luci::CircleNode *>(node->input());
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTranspose *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->a(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->perm(), loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTransposeConv *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(has_type(bias, Btype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpack *node)
+{
+ // node's output is the input of CircleUnpackOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->value(), Qtype))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpackOut *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+ // UnpackOut has the same qparam with the input of Unpack
+ auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
+ RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleTanh *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128);
+ return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleLogistic *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+ return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleSoftmax *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+ return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleTanh *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+ return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleLogistic *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+ return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleSoftmax *node)
+{
+ RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+ return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
new file mode 100644
index 000000000..ff1acbd6f
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+class VerifyQuantizedNodeType
+{
+public:
+ static std::shared_ptr<VerifyQuantizedNodeType> create(loco::DataType dtype);
+
+public:
+ virtual bool verify(luci::CircleNode *node) = 0;
+};
+
+/**
+ * @brief Verify using quantization type of a node and bias
+ *
+ * @tparam Qtype Quantization type for a node (e.g. Q8, Q16, ...)
+ * @tparam Btype Bias quantization type (e.g. For Q8, S32 is used)
+ */
+template <loco::DataType Qtype, loco::DataType Btype>
+class VerifyQuantizedNodeTypeBase : public luci::CircleNodeVisitor<bool>,
+ public VerifyQuantizedNodeType
+{
+public:
+ bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+protected:
+ bool has_type(const loco::Node *node, loco::DataType dtype)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ return circle_node->dtype() == dtype;
+ }
+
+ // Check whether a node and all of its inputs have dtype or not
+ bool group_has_type(const loco::Node *node, loco::DataType dtype)
+ {
+ if (!has_type(node, dtype))
+ return false;
+
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ if (!has_type(node->arg(i), dtype))
+ return false;
+
+ return true;
+ }
+
+private:
+ bool visit(const luci::CircleAdd *node);
+ bool visit(const luci::CircleArgMax *node);
+ bool visit(const luci::CircleAveragePool2D *node);
+ bool visit(const luci::CircleBatchToSpaceND *node);
+ bool visit(const luci::CircleCast *node);
+ bool visit(const luci::CircleConv2D *node);
+ bool visit(const luci::CircleConcatenation *node);
+ bool visit(const luci::CircleDepthToSpace *node);
+ bool visit(const luci::CircleDepthwiseConv2D *node);
+ bool visit(const luci::CircleDiv *node);
+ bool visit(const luci::CircleElu *node);
+ bool visit(const luci::CircleFloor *node);
+ bool visit(const luci::CircleFloorDiv *node);
+ bool visit(const luci::CircleFullyConnected *node);
+ bool visit(const luci::CircleGreater *node);
+ bool visit(const luci::CircleGreaterEqual *node);
+ bool visit(const luci::CircleInstanceNorm *node);
+ bool visit(const luci::CircleLocalResponseNormalization *node);
+ bool visit(const luci::CircleLogicalOr *node);
+ bool visit(const luci::CircleMaxPool2D *node);
+ bool visit(const luci::CircleMean *node);
+ bool visit(const luci::CircleMirrorPad *node);
+ bool visit(const luci::CircleMul *node);
+ bool visit(const luci::CircleNotEqual *node);
+ bool visit(const luci::CircleOneHot *node);
+ bool visit(const luci::CirclePack *node);
+ bool visit(const luci::CirclePad *node);
+ bool visit(const luci::CirclePadV2 *node);
+ bool visit(const luci::CirclePRelu *node);
+ bool visit(const luci::CirclePow *node);
+ bool visit(const luci::CircleRelu *node);
+ bool visit(const luci::CircleReshape *node);
+ bool visit(const luci::CircleResizeBilinear *node);
+ bool visit(const luci::CircleResizeNearestNeighbor *node);
+ bool visit(const luci::CircleRsqrt *node);
+ bool visit(const luci::CircleSlice *node);
+ bool visit(const luci::CircleSpaceToBatchND *node);
+ bool visit(const luci::CircleSpaceToDepth *node);
+ bool visit(const luci::CircleSplit *node);
+ bool visit(const luci::CircleSplitOut *node);
+ bool visit(const luci::CircleSplitV *node);
+ bool visit(const luci::CircleSplitVOut *node);
+ bool visit(const luci::CircleSqrt *node);
+ bool visit(const luci::CircleStridedSlice *node);
+ bool visit(const luci::CircleTranspose *node);
+ bool visit(const luci::CircleTransposeConv *node);
+ bool visit(const luci::CircleUnpack *node);
+ bool visit(const luci::CircleUnpackOut *node);
+
+ // NOTE below nodes has differnent implementation for Qtype/Btype and
+ // implementations exist in VerifyQuantizedNodeU8Type, VerifyQuantizedNodeS16Type
+ // bool visit(const luci::CircleLogistic *node);
+ // bool visit(const luci::CircleSoftmax *node);
+ // bool visit(const luci::CircleTanh *node);
+
+ // TODO: Implement more Ops
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+class VerifyQuantizedNodeU8Type
+ : public VerifyQuantizedNodeTypeBase<loco::DataType::U8, loco::DataType::S32>
+{
+private:
+ bool visit(const luci::CircleLogistic *node);
+ bool visit(const luci::CircleSoftmax *node);
+ bool visit(const luci::CircleTanh *node);
+};
+
+class VerifyQuantizedNodeS16Type
+ : public VerifyQuantizedNodeTypeBase<loco::DataType::S16, loco::DataType::S64>
+{
+private:
+ bool visit(const luci::CircleLogistic *node);
+ bool visit(const luci::CircleSoftmax *node);
+ bool visit(const luci::CircleTanh *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
deleted file mode 100644
index e7dd1b072..000000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <cmath>
-
-using Type = loco::DataType;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
- if (not(ARG)) \
- { \
- return false; \
- }
-
-namespace luci
-{
-
-/**
- * @brief Verify the data type of UINT8 quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeU8Type final : public luci::CircleNodeVisitor<bool>
-{
-private:
- bool has_type(const loco::Node *node, Type dtype)
- {
- auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
- return circle_node->dtype() == dtype;
- }
-
-private:
- bool visit(const luci::CircleConv2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleConcatenation *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- for (uint32_t i = 0; i < node->numValues(); i++)
- {
- RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
- }
- return true;
- }
-
- bool visit(const luci::CircleDepthToSpace *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleDepthwiseConv2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleInstanceNorm *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->beta(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CirclePack *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- for (uint32_t i = 0; i < node->values_count(); i++)
- {
- RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
- }
- return true;
- }
-
- bool visit(const luci::CirclePad *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CirclePadV2 *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleMirrorPad *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CirclePRelu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleTransposeConv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleFullyConnected *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->weights(), Type::U8))
- luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
- if (bias != nullptr)
- RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleAdd *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleAveragePool2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleBatchToSpaceND *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleLogicalOr *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
- return true;
- }
-
- bool visit(const luci::CircleMaxPool2D *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleLocalResponseNormalization *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleMean *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleMul *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleNotEqual *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleRelu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleReshape *node)
- {
- if (node->quantparam())
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::U8))
- }
- else
- {
- RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
- }
- luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
- if (shape != nullptr)
- RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleLogistic *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
- return true;
- }
-
- bool visit(const luci::CircleSoftmax *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->logits(), Type::U8))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
- return true;
- }
-
- bool visit(const luci::CircleSpaceToBatchND *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleSpaceToDepth *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleSlice *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
- RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleSplit *node)
- {
- // node's output is the input of CircleSplitOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleSplitOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
- // SplitOut has the same qparam with the input of Split
- auto split = loco::must_cast<luci::CircleSplit *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(split->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleSplitV *node)
- {
- // node's output is the input of CircleSplitVOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleSplitVOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
- // SplitVOut has the same qparam with the input of SplitV
- auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleStridedSlice *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-
- auto input = loco::must_cast<luci::CircleNode *>(node->input());
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleArgMax *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
- has_type(node->dimension(), Type::S64))
- return true;
- }
-
- bool visit(const luci::CircleTanh *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128);
- return true;
- }
-
- bool visit(const luci::CircleTranspose *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->a(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
- return true;
- }
-
- bool visit(const luci::CircleFloor *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
- // This checks the value of scale is an integer
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
- return true;
- }
-
- bool visit(const luci::CircleGreater *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleGreaterEqual *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleDiv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleFloorDiv *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-
- // This checks the value of scale is an integer
- RETURN_FALSE_UNLESS(node->quantparam());
- RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
- return true;
- }
-
- bool visit(const luci::CircleRsqrt *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleSqrt *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleElu *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CirclePow *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleResizeBilinear *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleResizeNearestNeighbor *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleUnpack *node)
- {
- // node's output is the input of CircleUnpackOut, thus not quantized
- RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
- return true;
- }
-
- bool visit(const luci::CircleUnpackOut *node)
- {
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
- // UnpackOut has the same qparam with the input of Unpack
- auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
- auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
- RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
- RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
- RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
- return true;
- }
-
- bool visit(const luci::CircleCast *node)
- {
- auto *input = loco::must_cast<luci::CircleNode *>(node->x());
- bool input_quantized = input->quantparam() != nullptr;
- if (input_quantized)
- {
- RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
- RETURN_FALSE_UNLESS(has_type(input, Type::U8))
- }
-
- bool node_quantized = node->quantparam() != nullptr;
- if (node_quantized)
- {
- RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
- RETURN_FALSE_UNLESS(has_type(node, Type::U8))
- }
- return true;
- }
-
- // TODO: Implement more Ops
-
- bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUNTIZED_NODE_U8_TYPE_H__
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp
new file mode 100644
index 000000000..ac07f9ec9
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <cassert>
+
+namespace luci
+{
+namespace
+{
+
+bool is_multiple_output_node(const luci::CircleNode *node)
+{
+ switch (node->opcode())
+ {
+ // The following nodes have multiple outputs. Output tensors are not produced by themselves but
+ // by the corresponding *Out nodes.
+ case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::SPLIT_V:
+ case luci::CircleOpcode::TOPK_V2:
+ case luci::CircleOpcode::UNIQUE:
+ case luci::CircleOpcode::UNPACK:
+ return true;
+ // TODO: Support ops
+ case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+ case luci::CircleOpcode::CUSTOM:
+ case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+ case luci::CircleOpcode::WHILE:
+ throw std::runtime_error("Unsupported op now");
+ default:
+ return false;
+ }
+}
+
+const luci::CircleNode *get_multi_output_node(const luci::CircleNode *node)
+{
+ if (is_multiple_output_node(node))
+ return node;
+
+ switch (node->opcode())
+ {
+ // The following nodes denote outputs of multiple-output nodes.
+ case luci::CircleOpcode::CIRCLESPLITOUT:
+ {
+ const auto split_out = loco::must_cast<const CircleSplitOut *>(node);
+ return loco::must_cast<luci::CircleNode *>(split_out->input());
+ }
+ case luci::CircleOpcode::CIRCLESPLITVOUT:
+ {
+ const auto splitv_out = loco::must_cast<const CircleSplitVOut *>(node);
+ return loco::must_cast<luci::CircleNode *>(splitv_out->input());
+ }
+ case luci::CircleOpcode::CIRCLETOPKV2OUT:
+ {
+ const auto top_kv2_out = loco::must_cast<const CircleTopKV2Out *>(node);
+ return loco::must_cast<luci::CircleNode *>(top_kv2_out->input());
+ }
+ case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+ {
+ const auto unique_out = loco::must_cast<const CircleUniqueOut *>(node);
+ return loco::must_cast<luci::CircleNode *>(unique_out->input());
+ }
+ case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ {
+ const auto unpack_out = loco::must_cast<const CircleUnpackOut *>(node);
+ return loco::must_cast<luci::CircleNode *>(unpack_out->input());
+ }
+ // TODO: Support these ops
+ case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
+ case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+ case luci::CircleOpcode::CIRCLEWHILEOUT:
+ throw std::runtime_error("Unsupported op now");
+ default:
+ return nullptr;
+ }
+}
+
+bool same_setting(const LayerInfo &left, const LayerInfo &right)
+{
+ return left.dtype == right.dtype and left.granularity == right.granularity;
+}
+
+void add_multi_output_node(LayerInfoMap &info_by_name, LayerInfo &layer_info,
+ const luci::CircleNode *node)
+{
+ assert(is_multiple_output_node(node)); // FIX_CALLER_UNLESS
+
+ const auto succs_nodes = loco::succs(node);
+ const auto name = node->name();
+
+ if (info_by_name.find(name) != info_by_name.end())
+ {
+ // Check that all outputs have equal dtype and granularity
+ for (const auto succs_node : succs_nodes)
+ {
+ const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+
+ const auto it = info_by_name.find(succs_circle_node->name());
+ if (it != info_by_name.end() and not same_setting(layer_info, (it->second)))
+ throw std::runtime_error("Outputs of multiple-output nodes should have equal dtype and "
+ "granularity. Check the quantization configuration file");
+ }
+ return;
+ }
+
+ // Add multiple output node to info_by_name
+ info_by_name[name] = {name, layer_info.dtype, layer_info.granularity};
+
+ // Add outputs node to info_by_name
+ for (const auto succs_node : succs_nodes)
+ {
+ const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+ const auto succs_circle_node_name = succs_circle_node->name();
+ info_by_name[succs_circle_node_name] = {succs_circle_node_name, layer_info.dtype,
+ layer_info.granularity};
+ }
+}
+
+} // namespace
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info)
+{
+ LayerInfoMap info_by_name;
+
+ for (auto &&info : layers_info)
+ {
+ auto name = info.name;
+ bool found = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto cnode = loco::must_cast<luci::CircleNode *>(node);
+ if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
+ continue;
+
+ if (cnode->name() == name)
+ {
+ // Check and add multiple-output node and its outputs to info_by_name
+ if (const auto multi_output = get_multi_output_node(cnode))
+ {
+ add_multi_output_node(info_by_name, info, multi_output);
+ found = true;
+ continue;
+ }
+
+ if (info_by_name.find(name) != info_by_name.end())
+ {
+ throw std::runtime_error("Duplicate layer name " + name +
+ ". Check layer names in the quantization configuration file.");
+ }
+
+ info_by_name[name] = info;
+ found = true;
+ continue;
+ }
+ }
+
+ if (not found)
+ throw std::runtime_error("No such layer named " + name +
+ ". Check layer names in the quantization configuration file.");
+ }
+
+ // TODO Check all names in layers_info exist in the info_by_name
+ // TODO Check names in info_by_name but not in layers_info are from virtual outputs
+
+ return info_by_name;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.h b/compiler/luci/pass/src/helpers/LayerInfoMap.h
new file mode 100644
index 000000000..bb4724a50
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+#define __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <unordered_map>
+
+namespace luci
+{
+
+using LayerInfoMap = std::unordered_map<std::string, luci::LayerInfo>;
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp
new file mode 100644
index 000000000..2ed28eda4
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SoftmaxTestGraph : public luci::test::TestIOGraph
+{
+public:
+ void init(void)
+ {
+ TestIOGraph::init({32}, {32});
+ _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+ {
+ _softmax->logits(input());
+ _softmax->beta(0.1);
+ _softmax->name("test");
+ }
+ output()->from(_softmax);
+ }
+
+private:
+ luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SplitAddTestGraph : public luci::test::TestIOGraph
+{
+public:
+ void init(void)
+ {
+ TestIOGraph::init({6, 1, 2}, {3, 1, 2});
+ _split_dim = g()->nodes()->create<luci::CircleConst>();
+ {
+ _split_dim->rank(1);
+ _split_dim->dtype(loco::DataType::S32);
+ _split_dim->size<loco::DataType::S32>(1);
+ _split_dim->at<loco::DataType::S32>(0);
+ _split_dim->shape({1});
+ _split_dim->name("split_dim");
+ }
+
+ _split = g()->nodes()->create<luci::CircleSplit>();
+ {
+ _split->input(input());
+ _split->num_split(2);
+ _split->split_dim(_split_dim);
+ _split->name("split0");
+ }
+
+ _split_out_1 = g()->nodes()->create<luci::CircleSplitOut>();
+ {
+ _split_out_1->input(_split);
+ _split_out_1->index(0);
+ _split_out_1->name("split0");
+ }
+
+ _split_out_2 = g()->nodes()->create<luci::CircleSplitOut>();
+ {
+ _split_out_2->input(_split);
+ _split_out_2->index(1);
+ _split_out_2->name("split1");
+ }
+
+ _add = g()->nodes()->create<luci::CircleAdd>();
+ {
+ _add->x(_split_out_1);
+ _add->y(_split_out_2);
+ _add->name("add");
+ }
+ output()->from(_add);
+ }
+
+private:
+ luci::CircleSplit *_split = nullptr;
+ luci::CircleSplitOut *_split_out_1 = nullptr;
+ luci::CircleSplitOut *_split_out_2 = nullptr;
+ luci::CircleConst *_split_dim = nullptr;
+ luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace
+
+TEST(LayerInfoMapTest, simple_test)
+{
+ SoftmaxTestGraph g;
+ g.init();
+
+ luci::LayerInfo info;
+ {
+ info.name = "test";
+ info.dtype = loco::DataType::U8;
+ info.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ std::vector<luci::LayerInfo> v;
+ v.emplace_back(info);
+ auto map = luci::layer_info_map(g.g(), v);
+
+ EXPECT_EQ("test", map["test"].name);
+ EXPECT_EQ(loco::DataType::U8, map["test"].dtype);
+ EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["test"].granularity);
+}
+
+TEST(LayerInfoMapTest, multiple_output_node_test)
+{
+ SplitAddTestGraph g;
+ g.init();
+
+ luci::LayerInfo info;
+ {
+ info.name = "split0";
+ info.dtype = loco::DataType::U8;
+ info.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ std::vector<luci::LayerInfo> v;
+ v.emplace_back(info);
+ auto map = luci::layer_info_map(g.g(), v);
+
+ EXPECT_EQ(map.size(), 2);
+ EXPECT_EQ("split0", map["split0"].name);
+ EXPECT_EQ("split1", map["split1"].name);
+
+ EXPECT_EQ(loco::DataType::U8, map["split0"].dtype);
+ EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["split0"].granularity);
+}
+
+TEST(LayerInfoMapTest, invalid_layer_info_multiple_output_node_NEG)
+{
+ SplitAddTestGraph g;
+ g.init();
+
+ luci::LayerInfo info_0;
+ {
+ info_0.name = "split0";
+ info_0.dtype = loco::DataType::U8;
+ info_0.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ luci::LayerInfo info_1;
+ {
+ info_1.name = "split1";
+ info_1.dtype = loco::DataType::S16;
+ info_1.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ std::vector<luci::LayerInfo> v;
+ v.emplace_back(info_0);
+ v.emplace_back(info_1);
+
+ EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, duplicate_name_NEG)
+{
+ SoftmaxTestGraph g;
+ g.init();
+ g.input()->name("test");
+
+ luci::LayerInfo info;
+ {
+ info.name = "test";
+ info.dtype = loco::DataType::U8;
+ info.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ std::vector<luci::LayerInfo> v;
+ v.emplace_back(info);
+ EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, no_name_NEG)
+{
+ SoftmaxTestGraph g;
+ g.init();
+
+ luci::LayerInfo info;
+ {
+ info.name = "noname";
+ info.dtype = loco::DataType::U8;
+ info.granularity = luci::QuantizationGranularity::ChannelWise;
+ }
+ std::vector<luci::LayerInfo> v;
+ v.emplace_back(info);
+ EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index 3ccc58128..e896188be 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -4,8 +4,8 @@ require("loco")
require("locop")
require("logo")
require("logo-core")
-require("mio-circle")
-require("mio-tflite")
+require("mio-circle04")
+require("mio-tflite280")
require("oops")
require("hermes")
require("hermes-std")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
index 0e6097f96..24bdfc152 100644
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -10,7 +10,6 @@ add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_service PRIVATE src)
target_include_directories(luci_service PUBLIC include)
target_link_libraries(luci_service PUBLIC luci_lang)
-target_link_libraries(luci_service PUBLIC mio_circle)
target_link_libraries(luci_service PUBLIC logo_core)
target_link_libraries(luci_service PRIVATE luci_log)
target_link_libraries(luci_service PRIVATE luci_logex)
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
index ead12d074..2c1120941 100644
--- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -17,11 +17,12 @@
#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
#define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
-#include <loco/IR/Nodes.h>
-
+#include <luci/Service/CircleShapeInferenceRule.h>
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInferenceRule.h>
+
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
namespace luci
{
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
index d62731380..e0ceabeac 100644
--- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
@@ -17,13 +17,11 @@
#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_H__
#define __LUCI_CIRCLE_TYPE_INFERENCE_H__
-#include <loco/IR/Nodes.h>
-
-#include <mio/circle/schema_generated.h>
-
+#include <luci/Service/CircleTypeInferenceRule.h>
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInferenceRule.h>
+
+#include <loco/IR/DataType.h>
namespace luci
{
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h
index 3926147f5..99e4561b3 100644
--- a/compiler/luci/service/src/CircleCloneNode.h
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -208,6 +208,7 @@ public:
luci::CircleNode *visit(const luci::CircleSquaredDifference *) final;
luci::CircleNode *visit(const luci::CircleSqueeze *) final;
luci::CircleNode *visit(const luci::CircleStridedSlice *) final;
+ luci::CircleNode *visit(const luci::CircleSVDF *) final;
luci::CircleNode *visit(const luci::CircleSub *) final;
luci::CircleNode *visit(const luci::CircleSum *) final;
luci::CircleNode *visit(const luci::CircleTanh *) final;
@@ -269,6 +270,7 @@ public:
luci::CircleNode *visit(const luci::CircleTopKV2Out *) final;
luci::CircleNode *visit(const luci::CircleUniqueOut *) final;
luci::CircleNode *visit(const luci::CircleUnpackOut *) final;
+ luci::CircleNode *visit(const luci::CircleVariable *) final;
luci::CircleNode *visit(const luci::CircleWhileOut *) final;
// Handle in CircleNode
diff --git a/compiler/luci/service/src/CircleNodeClone.cpp b/compiler/luci/service/src/CircleNodeClone.cpp
index d2033dd0c..220c6096c 100644
--- a/compiler/luci/service/src/CircleNodeClone.cpp
+++ b/compiler/luci/service/src/CircleNodeClone.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "luci/IR/CircleQuantParam.h"
#include "luci/Service/CircleNodeClone.h"
#include "CircleCloneNode.h"
@@ -45,18 +46,7 @@ void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst)
dst->shape_status(src->shape_status());
// quantparam
- const auto *quantparam = src->quantparam();
- if (quantparam != nullptr)
- {
- auto qparam = std::make_unique<luci::CircleQuantParam>();
- qparam->scale = quantparam->scale;
- qparam->zerop = quantparam->zerop;
- qparam->min = quantparam->min;
- qparam->max = quantparam->max;
- qparam->quantized_dimension = quantparam->quantized_dimension;
-
- dst->quantparam(std::move(qparam));
- }
+ copy_quantparam(src, dst);
// sparsity
const auto *sparsity = src->sparsityparam();
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index 5d6a31050..9d156f3e2 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -196,23 +197,18 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
return loco::NodeShape{output_shape};
}
-template <class CIRCLENODE> loco::NodeShape use_inputs(const CIRCLENODE *node)
-{
- auto inputs_shape = luci::shape_get(node->inputs()).template as<loco::TensorShape>();
- return loco::NodeShape{inputs_shape};
-}
+#define DECLARE_USE_SINGLE(NAME) \
+ template <class CIRCLENODE> loco::NodeShape use_##NAME(const CIRCLENODE *node) \
+ { \
+ auto inputs_shape = luci::shape_get(node->NAME()).template as<loco::TensorShape>(); \
+ return loco::NodeShape{inputs_shape}; \
+ }
-template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
-{
- auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
- return loco::NodeShape{x_shape};
-}
+DECLARE_USE_SINGLE(inputs);
+DECLARE_USE_SINGLE(x);
+DECLARE_USE_SINGLE(logits);
-template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
-{
- auto shape = luci::shape_get(node->logits()).template as<loco::TensorShape>();
- return loco::NodeShape{shape};
-}
+#undef DECLARE_USE_SINGLE
template <class CIRCLENODE>
loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
@@ -721,6 +717,8 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
+// TODO Remove following unused code
+#if 0
// Checking shape capability for fully connected layer
// Input: a tensor of at least rank 2 [D1, D2, ... Dn]
// Weight: [# of units, K]
@@ -741,6 +739,40 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
out_shape.rank(2);
out_shape.dim(0) = batch_size;
out_shape.dim(1) = weights_shape.dim(0);
+#endif
+
+ loco::TensorShape out_shape;
+
+ // NOTE Some recipes in some repositories are using rank 4 input for FullyConnected.
+ // Until they are all fixed, disable following assert.
+ // TODO Enable following assert after related fixes are applied
+ // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L194
+ // LUCI_ASSERT(input_shape.rank() == 2 || input_shape.rank() == 3,
+ // "Input rank of FullyConnected should be 2 or 3");
+
+ // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L225
+ LUCI_ASSERT(weights_shape.rank() == 2, "Weights of FullyConnected should be 2");
+
+ // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L353-L367
+ if (node->keep_num_dims())
+ {
+ out_shape.rank(input_shape.rank());
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ out_shape.dim(i) = input_shape.dim(i);
+ out_shape.dim(out_shape.rank() - 1) = weights_shape.dim(0);
+ }
+ else
+ {
+ uint32_t input_size = 1;
+ for (uint32_t i = 0; i < input_shape.rank(); i++)
+ {
+ input_size = input_size * input_shape.dim(i).value();
+ }
+ const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+ out_shape.rank(2);
+ out_shape.dim(0) = batch_size;
+ out_shape.dim(1) = weights_shape.dim(0);
+ }
return loco::NodeShape{out_shape};
}
@@ -1554,6 +1586,30 @@ loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
return loco::NodeShape{output_shape};
}
+loco::NodeShape infer_svdf(const luci::CircleSVDF *node)
+{
+ const auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ const auto weight_feature_shape = luci::shape_get(node->weight_feature()).as<loco::TensorShape>();
+
+ assert(ifm_shape.rank() == 2);
+ assert(weight_feature_shape.rank() == 2);
+
+ assert(ifm_shape.dim(1) == weight_feature_shape.dim(1));
+ assert(weight_feature_shape.dim(0).known());
+
+ const auto rank = node->svdf_rank();
+ const auto num_filters = weight_feature_shape.dim(0).value();
+ assert(num_filters % rank == 0);
+ const auto num_units = num_filters / rank;
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(2);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = num_units;
+
+ return loco::NodeShape{ofm_shape};
+}
+
loco::NodeShape infer_tile(const luci::CircleTile *node)
{
const loco::DataType S32 = loco::DataType::S32;
@@ -2393,6 +2449,8 @@ public:
return loco::NodeShape{output_shape};
}
+ loco::NodeShape visit(const luci::CircleSVDF *node) final { return infer_svdf(node); }
+
loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
@@ -2486,6 +2544,8 @@ public:
loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
+ loco::NodeShape visit(const luci::CircleVariable *node) final { return use_own(node); }
+
loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
};
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index 5f6d46f2b..438c4a364 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -478,6 +478,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); }
+ loco::DataType visit(const luci::CircleSVDF *node) final
+ {
+ return luci::dtype_get(node->input());
+ }
+
loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleTile *node) final
@@ -605,6 +610,8 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
return loco::DataType::S32;
}
+ loco::DataType visit(const luci::CircleVariable *node) final { return node->dtype(); }
+
loco::DataType visit(const luci::CircleUniqueOut *node) final
{
if (node->index() == 0)
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..d4c3ce88f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSVDF *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleSVDF>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+ cloned->svdf_rank(node->svdf_rank());
+ }
+ return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..d6edaf1cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SVDF)
+{
+ auto g = loco::make_graph();
+ auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+ node_svdf->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_svdf, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_svdf = dynamic_cast<luci::CircleSVDF *>(cloned);
+ ASSERT_NE(nullptr, cloned_svdf);
+ ASSERT_EQ(node_svdf->asymmetric_quantize_inputs(), cloned_svdf->asymmetric_quantize_inputs());
+ ASSERT_EQ(node_svdf->svdf_rank(), cloned_svdf->svdf_rank());
+}
+
+TEST(CloneNodeTest, clone_SVDF_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+ node_svdf->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_svdf, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.cpp b/compiler/luci/service/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..c1430bd3a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleVariable *)
+{
+ return _graph->nodes()->create<luci::CircleVariable>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.test.cpp b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp
new file mode 100644
index 000000000..7d29438be
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Variable)
+{
+ auto g = loco::make_graph();
+ auto node_dummy = g->nodes()->create<luci::CircleVariable>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dummy, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_variable = dynamic_cast<luci::CircleVariable *>(cloned);
+ ASSERT_NE(nullptr, cloned_variable);
+}
diff --git a/compiler/luci/tests/CMakeLists.txt b/compiler/luci/tests/CMakeLists.txt
index c03835823..1333efb7d 100644
--- a/compiler/luci/tests/CMakeLists.txt
+++ b/compiler/luci/tests/CMakeLists.txt
@@ -1,3 +1,14 @@
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLITE2CIRCLE_PATH $<TARGET_FILE:tflite2circle>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+ # TODO use better way to represent path for host executable
+ set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+ set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+ set(TFLITE2CIRCLE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflite2circle/tflite2circle)
+ message(STATUS "TFLITE2CIRCLE_PATH = ${TFLITE2CIRCLE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
# TODO use local test.recipe files for small networks
file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
@@ -17,14 +28,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .tflite
add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
- COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
- DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+ COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+ DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
# Generate .circle
add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
- COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
- DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+ COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+ DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -52,14 +63,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .tflite
add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
- COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
- DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+ COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+ DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
# Generate .circle
add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
- COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
- DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+ COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+ DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -87,8 +98,8 @@ foreach(RECIPE IN ITEMS ${RECIPES2})
# Generate .circle
add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
- COMMAND circlechef-file "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
- DEPENDS circlechef-file "${RECIPE_SOURCE_FILE}"
+ COMMAND ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
+ DEPENDS ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -111,6 +122,8 @@ include("test.lst")
# Read "test.local.lst" if exists
include("test.local.lst" OPTIONAL)
+# NOTE $<TARGET_FILE:luci_readtester> is used as-is as test itself should
+# run in target device for cross build also
add_test(NAME luci_unit_readtest
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/readverify.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 28ddcf672..94e723f21 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -180,6 +180,8 @@ addread(Sub_000)
addread(Sub_U8_000)
addread(Sum_000)
addread(Sum_001)
+addread(SVDF_000)
+addread(SVDF_001)
addread(Tanh_000)
addread(Tanh_U8_000)
addread(Tile_000)
@@ -403,6 +405,8 @@ addwrite(Sub_000)
addwrite(Sub_U8_000)
addwrite(Sum_000)
addwrite(Sum_001)
+addwrite(SVDF_000)
+addwrite(SVDF_001)
addwrite(Tanh_000)
addwrite(Tanh_U8_000)
addwrite(Tile_000)
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt
index fa05ef0fa..d24717343 100644
--- a/compiler/mio-circle/CMakeLists.txt
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -1,13 +1,14 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
+ message(STATUS "mio-circle skip: FlatBuffers 2.0 NOT FOUND")
return()
endif(NOT FlatBuffers_FOUND)
message(STATUS "Build mio-circle: TRUE")
# TODO Find a better way
-set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3/circle_schema.fbs")
# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
@@ -26,3 +27,10 @@ FlatBuffers_Target(mio_circle
# This example shows how to use "mio-circle" library
add_executable(mio_circle_example example.cpp)
target_link_libraries(mio_circle_example mio_circle)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(mio_circle_helper STATIC ${SOURCES})
+target_include_directories(mio_circle_helper PRIVATE src)
+target_include_directories(mio_circle_helper PUBLIC include)
+target_link_libraries(mio_circle_helper mio_circle)
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc b/compiler/mio-circle/include/mio_circle/Helper.h
index 774f8151f..c0f8115fe 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc
+++ b/compiler/mio-circle/include/mio_circle/Helper.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,30 +14,23 @@
* limitations under the License.
*/
-#include "GpuObject.h"
+#ifndef __MIO_CIRCLE_HELPER_H__
+#define __MIO_CIRCLE_HELPER_H__
-namespace onert
-{
-namespace backend
+#include <mio/circle/schema_generated.h>
+
+namespace mio
{
-namespace gpu_cl
+namespace circle
{
-std::string MemoryTypeToCLType(MemoryType type)
-{
- switch (type)
- {
- case MemoryType::GLOBAL:
- return "__global";
- case MemoryType::CONSTANT:
- return "__constant";
- break;
- case MemoryType::LOCAL:
- return "__local";
- }
- return "";
-}
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+} // namespace circle
+} // namespace mio
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
+#endif // __MIO_CIRCLE_HELPER_H__
diff --git a/compiler/mio-circle/src/Helper.cpp b/compiler/mio-circle/src/Helper.cpp
new file mode 100644
index 000000000..6f30c8c10
--- /dev/null
+++ b/compiler/mio-circle/src/Helper.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+ ::circle::BuiltinOperator code = opcode->builtin_code();
+ return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+ ::circle::BuiltinOperator code = opcode->builtin_code();
+ return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ ::circle::BuiltinOperator code = opcode->builtin_code();
+ return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+ return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/CMakeLists.txt b/compiler/mio-circle04/CMakeLists.txt
new file mode 100644
index 000000000..8ee6da44c
--- /dev/null
+++ b/compiler/mio-circle04/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "mio-circle04 skip: FlatBuffers 2.0 NOT FOUND")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle04: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_circle04
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle04" library
+add_executable(mio_circle04_example example.cpp)
+target_link_libraries(mio_circle04_example mio_circle04)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle04_helper STATIC ${SOURCES})
+set_target_properties(mio_circle04_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle04_helper PRIVATE src)
+target_include_directories(mio_circle04_helper PUBLIC include)
+target_link_libraries(mio_circle04_helper mio_circle04)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle04_helper_test ${TESTS})
+target_include_directories(mio_circle04_helper_test PRIVATE src)
+target_link_libraries(mio_circle04_helper_test mio_circle04)
+target_link_libraries(mio_circle04_helper_test mio_circle04_helper)
diff --git a/compiler/mio-circle04/README.md b/compiler/mio-circle04/README.md
new file mode 100644
index 000000000..d12dd78ff
--- /dev/null
+++ b/compiler/mio-circle04/README.md
@@ -0,0 +1,3 @@
+# mio-circle04
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle04/example.cpp b/compiler/mio-circle04/example.cpp
new file mode 100644
index 000000000..1970f4066
--- /dev/null
+++ b/compiler/mio-circle04/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle04"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+ std::ifstream ifs(argv[1], std::ios_base::binary);
+ std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cout << "Fail" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Pass" << std::endl;
+ return 0;
+}
diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h
new file mode 100644
index 000000000..d3ffc23e5
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_HELPER_H__
+#define __MIO_CIRCLE04_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_HELPER_H__
diff --git a/compiler/mio-circle04/src/Helper.cpp b/compiler/mio-circle04/src/Helper.cpp
new file mode 100644
index 000000000..8b8737a2d
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ * we cannot use std::max() like tflite as deprecated_builtin_code can be
+ * minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ if (opcode->deprecated_builtin_code() == 127)
+ {
+ assert(opcode->builtin_code() >= 127);
+ return opcode->builtin_code();
+ }
+ // There was no 255(-1) value in v0.3
+ assert(opcode->deprecated_builtin_code() != -1);
+ return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+ // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+ const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+ if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+ return false;
+ // There was no 255(-1) value in v0.3
+ if (deprecated_builtin_code == -1)
+ return false;
+
+ const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+ if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+ builtin_code <= ::circle::BuiltinOperator_MAX))
+ return false;
+
+ return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+ ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+ return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+ return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+ return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+ if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+ return "(noname)";
+
+ return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Helper.test.cpp b/compiler/mio-circle04/src/Helper.test.cpp
new file mode 100644
index 000000000..20fce0843
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle04_helper_test : public ::testing::Test
+{
+protected:
+ void initialization_finish(void)
+ {
+ _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+ }
+
+protected:
+ void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+ circle::BuiltinOperator builtin_code)
+ {
+ _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+ _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+ }
+
+ const circle::OperatorCode *get_operator_code(uint8_t idx)
+ {
+ return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+ }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb;
+ std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_helper_test, v04)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", circle::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+ circle::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_old)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+ circle::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_NEG)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", circle::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127)
+{
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+ circle::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127_NEG)
+{
+ // BuiltinOperator_CONV_2D = 3
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom)
+{
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+ circle::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127)
+{
+ // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+ // BuiltinOperator_CUMSUM = 128
+ add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+ circle::BuiltinOperator_CUMSUM);
+ ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt
index 4660e4003..90187b037 100644
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)")
diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt
index 39f4d9a31..f2cfeafcc 100644
--- a/compiler/mio-tflite260/CMakeLists.txt
+++ b/compiler/mio-tflite260/CMakeLists.txt
@@ -1,7 +1,7 @@
-nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
- message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)")
+ message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 2.0)")
return()
endif(NOT FlatBuffers_FOUND)
@@ -47,3 +47,23 @@ endif(NOT TensorFlowGEMMLowpSource_FOUND)
add_library(mio_tflite260_inc INTERFACE)
target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite260_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite260_helper PRIVATE src)
+target_include_directories(mio_tflite260_helper PUBLIC include)
+target_link_libraries(mio_tflite260_helper mio_tflite260)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite260_helper_test ${TESTS})
+target_include_directories(mio_tflite260_helper_test PRIVATE src)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260_helper)
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h b/compiler/mio-tflite260/include/mio_tflite260/Helper.h
index f56fc3d83..cb027e604 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h
+++ b/compiler/mio-tflite260/include/mio_tflite260/Helper.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,27 +14,24 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
+#ifndef __MIO_TFLITE260_HELPER_H__
+#define __MIO_TFLITE260_HELPER_H__
-#include "Api.h"
-#include "TensorType.h"
+#include <mio/tflite/schema_generated.h>
-namespace onert
+namespace mio
{
-namespace backend
+namespace tflite
{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type);
-
-DataLayout ToDataLayout(TensorStorageType type);
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout);
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
+} // namespace tflite
+} // namespace mio
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
+#endif // __MIO_TFLITE260_HELPER_H__
diff --git a/compiler/mio-tflite260/src/Helper.cpp b/compiler/mio-tflite260/src/Helper.cpp
new file mode 100644
index 000000000..9669058ea
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ return std::max(opcode->builtin_code(),
+ static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+ // Valid Range : 0 <= deprecated_builtin_code <= 127
+ const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+ if (deprecated_builtin_code < 0)
+ return false;
+
+ const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+ if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+ builtin_code <= ::tflite::BuiltinOperator_MAX))
+ return false;
+
+ return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+ ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+ return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+ return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+ return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite260/src/Helper.test.cpp b/compiler/mio-tflite260/src/Helper.test.cpp
new file mode 100644
index 000000000..e1ef04ca7
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite260_helper_test : public ::testing::Test
+{
+protected:
+ void initialization_finish(void)
+ {
+ _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+ }
+
+protected:
+ void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+ tflite::BuiltinOperator builtin_code)
+ {
+ _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+ _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+ }
+
+ const tflite::OperatorCode *get_operator_code(uint8_t idx)
+ {
+ return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+ }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb;
+ std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite260_helper_test, v3)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_custom)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_NEG)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127)
+{
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127_NEG)
+{
+ // BuiltinOperator_CONV_2D = 3
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom)
+{
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127)
+{
+ // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+ // BuiltinOperator_CUMSUM = 128
+ add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUMSUM);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite280/CMakeLists.txt b/compiler/mio-tflite280/CMakeLists.txt
new file mode 100644
index 000000000..f48711eb7
--- /dev/null
+++ b/compiler/mio-tflite280/CMakeLists.txt
@@ -0,0 +1,69 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "Build mio-tflite280: FAILED (missing Flatbuffers 2.0)")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+ message(STATUS "Build mio-tflite280: FAILED (missing TensorFlowSource 2.8.0)")
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite280: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite280
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite280_example example.cpp)
+target_link_libraries(mio_tflite280_example mio_tflite280)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite280_validate example.cpp)
+target_link_libraries(mio_tflite280_validate mio_tflite280)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+ return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite280_inc INTERFACE)
+target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite280_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite280_helper PRIVATE src)
+target_include_directories(mio_tflite280_helper PUBLIC include)
+target_link_libraries(mio_tflite280_helper mio_tflite280)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite280_helper_test ${TESTS})
+target_include_directories(mio_tflite280_helper_test PRIVATE src)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280_helper)
diff --git a/compiler/mio-tflite280/README.md b/compiler/mio-tflite280/README.md
new file mode 100644
index 000000000..73219a7df
--- /dev/null
+++ b/compiler/mio-tflite280/README.md
@@ -0,0 +1,3 @@
+# mio-tflite280
+
+_mio-tflite280_ provides a library to access TensorFlow lite model files with V2.8.0.
diff --git a/compiler/mio-tflite280/example.cpp b/compiler/mio-tflite280/example.cpp
new file mode 100644
index 000000000..83356b943
--- /dev/null
+++ b/compiler/mio-tflite280/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite280"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+ std::ifstream ifs(argv[1], std::ios_base::binary);
+ std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ std::cout << "Fail" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Pass" << std::endl;
+ return 0;
+}
diff --git a/compiler/mio-tflite280/include/mio_tflite280/Helper.h b/compiler/mio-tflite280/include/mio_tflite280/Helper.h
new file mode 100644
index 000000000..b0fb0ace7
--- /dev/null
+++ b/compiler/mio-tflite280/include/mio_tflite280/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE280_HELPER_H__
+#define __MIO_TFLITE280_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE280_HELPER_H__
diff --git a/compiler/mio-tflite280/src/Helper.cpp b/compiler/mio-tflite280/src/Helper.cpp
new file mode 100644
index 000000000..ebf0bd140
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ return std::max(opcode->builtin_code(),
+ static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+ // Valid Range : 0 <= deprecated_builtin_code <= 127
+ const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+ if (deprecated_builtin_code < 0)
+ return false;
+
+ const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+ if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+ builtin_code <= ::tflite::BuiltinOperator_MAX))
+ return false;
+
+ return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+ ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+ return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+ assert(opcode);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ if (!opcode->custom_code())
+ return "(invalid custom)";
+
+ std::string custom_op = "CUSTOM(";
+ custom_op += opcode->custom_code()->c_str();
+ custom_op += ")";
+ return custom_op;
+ }
+
+ ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+ return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+ return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+ static const char *kEmptyTensorName = "(noname)";
+
+ auto name = tensor->name();
+ if (name)
+ return name->c_str();
+
+ return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite280/src/Helper.test.cpp b/compiler/mio-tflite280/src/Helper.test.cpp
new file mode 100644
index 000000000..df573bf44
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite280_helper_test : public ::testing::Test
+{
+protected:
+ void initialization_finish(void)
+ {
+ _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+ }
+
+protected:
+ void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+ tflite::BuiltinOperator builtin_code)
+ {
+ _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+ _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+ }
+
+ const tflite::OperatorCode *get_operator_code(uint8_t idx)
+ {
+ return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+ }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb;
+ std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite280_helper_test, v3)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_custom)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_NEG)
+{
+ // BuiltinOperator_ADD = 0
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127)
+{
+ // BuiltinOperator_CONV_2D = 3
+ add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CONV_2D);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127_NEG)
+{
+ // BuiltinOperator_CONV_2D = 3
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom)
+{
+ // BuiltinOperator_CUSTOM = 32
+ add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUSTOM);
+ ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127)
+{
+ // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+ // BuiltinOperator_CUMSUM = 128
+ add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+ ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+ tflite::BuiltinOperator_CUMSUM);
+ ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127_NEG)
+{
+ // BuiltinOperator_CUMSUM = 128
+ // deprecated_builtin_code cannot be negative value
+ add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+ initialization_finish();
+
+ ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
index e6eb13b93..04c22055e 100644
--- a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
@@ -112,6 +112,10 @@ target_include_directories(mir_onnx_importer PUBLIC ../../include/mir_onnx_impor
target_include_directories(mir_onnx_importer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(mir_onnx_importer PUBLIC mir mir_onnx_proto PRIVATE mir_interpreter nncc_common)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_find_package(GTest REQUIRED)
file(GLOB_RECURSE TEST_SOURCES "*.test.cpp")
diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
index 42eb4f8a5..6c6c28a32 100644
--- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED)
+nnas_find_package(FlatBuffers EXACT 2.0 REQUIRED)
if (NOT FlatBuffers_FOUND)
return()
diff --git a/compiler/mir2loco/CMakeLists.txt b/compiler/mir2loco/CMakeLists.txt
index a8a096ef4..217f1bd15 100644
--- a/compiler/mir2loco/CMakeLists.txt
+++ b/compiler/mir2loco/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(mir2loco PUBLIC include)
target_link_libraries(mir2loco PUBLIC mir)
target_link_libraries(mir2loco PUBLIC loco)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
GTest_AddTest(mir2loco_test ${TESTS})
target_link_libraries(mir2loco_test mir2loco)
diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt
index 7c42761ba..95669264f 100644
--- a/compiler/moco-tf/CMakeLists.txt
+++ b/compiler/moco-tf/CMakeLists.txt
@@ -26,6 +26,7 @@ target_link_libraries(moco_tf_frontend PRIVATE locomotiv)
target_link_libraries(moco_tf_frontend PRIVATE plier_tf)
target_link_libraries(moco_tf_frontend PRIVATE locoex_customop)
target_link_libraries(moco_tf_frontend PRIVATE logo)
+target_link_libraries(moco_tf_frontend PRIVATE logo_ex)
target_link_libraries(moco_tf_frontend PRIVATE oops)
install(TARGETS moco_tf_frontend DESTINATION lib)
@@ -46,4 +47,5 @@ target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
target_link_libraries(moco_tf_frontend_test plier_tf)
target_link_libraries(moco_tf_frontend_test locoex_customop)
target_link_libraries(moco_tf_frontend_test logo)
+target_link_libraries(moco_tf_frontend_test logo_ex)
add_test(moco_tf_frontend_test moco_tf_frontend_test)
diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake
index 90590e374..71755556c 100644
--- a/compiler/moco-tf/requires.cmake
+++ b/compiler/moco-tf/requires.cmake
@@ -9,5 +9,6 @@ require("mio-tf")
require("plier-tf")
require("locoex-customop")
require("logo")
+require("logo-ex")
require("oops")
require("bino")
diff --git a/compiler/moco-tf/src/Transforms.h b/compiler/moco-tf/src/Transforms.h
index f14b81675..a197a796e 100644
--- a/compiler/moco-tf/src/Transforms.h
+++ b/compiler/moco-tf/src/Transforms.h
@@ -21,6 +21,7 @@
#include "Transforms/TypeInferencePass.h"
#include <logo/Passes.h>
+#include <logo/PassesEx.h>
#include <moco/Pass/Passes.h>
#endif // __MOCO_TF_TRANSFORMS_H__
diff --git a/compiler/morph/CMakeLists.txt b/compiler/morph/CMakeLists.txt
index ec7da8d30..5a5ae2623 100644
--- a/compiler/morph/CMakeLists.txt
+++ b/compiler/morph/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(morph PUBLIC include)
target_link_libraries(morph PRIVATE nncc_common)
target_link_libraries(morph PUBLIC angkor)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
add_executable(morph_test ${TESTS})
target_link_libraries(morph_test morph)
diff --git a/compiler/nest/core/CMakeLists.txt b/compiler/nest/core/CMakeLists.txt
index b603f9ae9..4f17db3b4 100644
--- a/compiler/nest/core/CMakeLists.txt
+++ b/compiler/nest/core/CMakeLists.txt
@@ -15,11 +15,11 @@ foreach(EXAMPLE_FILE IN ITEMS ${EXAMPLE_FILES})
target_link_libraries(${TARGET_NAME} nest_core)
endforeach(EXAMPLE_FILE)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
add_executable(nest_core_test ${TESTS})
target_link_libraries(nest_core_test gtest_main)
diff --git a/compiler/nike/CMakeLists.txt b/compiler/nike/CMakeLists.txt
index 737c73b8f..6bd3199e3 100644
--- a/compiler/nike/CMakeLists.txt
+++ b/compiler/nike/CMakeLists.txt
@@ -5,11 +5,11 @@ list(REMOVE_ITEM SOURCES ${TESTS})
add_library(nike STATIC ${SOURCES})
target_include_directories(nike PUBLIC include)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
GTest_AddTest(nike_test ${TESTS})
target_link_libraries(nike_test nike)
diff --git a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
index d38385e91..c2135c4be 100644
--- a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
@@ -22,6 +22,8 @@
#include <gtest/gtest.h>
+#include <algorithm>
+
using namespace std;
using namespace nnc;
using namespace mir;
diff --git a/compiler/nnop/CMakeLists.txt b/compiler/nnop/CMakeLists.txt
index 82c0e3a86..d2c8af26d 100644
--- a/compiler/nnop/CMakeLists.txt
+++ b/compiler/nnop/CMakeLists.txt
@@ -2,11 +2,11 @@ add_library(nnop INTERFACE)
target_include_directories(nnop INTERFACE include)
target_link_libraries(nnop INTERFACE angkor)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
file(GLOB_RECURSE TESTS "src/*.test.cpp")
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt
index 729bfa80a..8732340ae 100644
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -14,6 +14,11 @@ set(ONE_COMMAND_FILES
onecc
)
+# pytorch importer is an experimental feature, it is not used in default configuration
+if(ENABLE_ONE_IMPORT_PYTORCH)
+ list(APPEND ONE_COMMAND_FILES one-import-pytorch)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
+
foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
set(ONE_COMMAND_FILE ${ONE_COMMAND})
@@ -41,6 +46,7 @@ set(ONE_UTILITY_FILES
one-build.template.cfg
onecc.template.cfg
utils.py
+ onnx_legalizer.py
)
foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
@@ -66,6 +72,39 @@ foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
endforeach(ONE_UTILITY)
+# make python directory
+set(ONE_PYTHON_FILES constant.py
+ make_cmd.py)
+
+foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
+
+ set(ONE_PYTHON_DIR "onelib")
+ set(ONE_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}")
+ set(ONE_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+ set(ONE_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+ set(ONE_PYTHON_TARGET "${ONE_PYTHON_FILE}_target")
+
+ add_custom_command(OUTPUT ${ONE_PYTHON_DIR_BIN}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${ONE_PYTHON_DIR_BIN}"
+ COMMENT "Generate ${ONE_PYTHON_DIR_BIN}"
+ )
+
+ add_custom_command(OUTPUT ${ONE_PYTHON_FILE_BIN}
+ COMMAND ${CMAKE_COMMAND} -E copy "${ONE_PYTHON_FILE_SRC}" "${ONE_PYTHON_FILE_BIN}"
+ DEPENDS ${ONE_PYTHON_SRC}
+ COMMENT "Generate ${ONE_PYTHON_FILE_BIN}"
+ )
+
+ add_custom_target(${ONE_PYTHON_TARGET} ALL DEPENDS ${ONE_PYTHON_DIR_BIN} ${ONE_PYTHON_FILE_BIN})
+
+ install(DIRECTORY ${ONE_PYTHON_DIR}
+ FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION bin)
+
+endforeach(ONE_PYTHON_FILE)
+
set(ONE_DOCUMENT_FILES
how-to-use-one-commands.txt
how-to-prepare-virtualenv.txt
diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt
index 6d846c081..8d6007f38 100644
--- a/compiler/one-cmds/how-to-prepare-virtualenv.txt
+++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt
@@ -5,7 +5,7 @@ Last update: 2020-09-15
This document explains about 'one-prepare-venv' command.
-'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
+'one-prepare-venv' will prepare python3.8 virtual environment with tensorflow-cpu
version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
command can execute properly.
@@ -20,7 +20,7 @@ Please install these required packages before venv preparation.
$ sudo apt-get update
$ sudo apt-get upgrade
-$ sudo apt-get install python3-pip python3-venv
+$ sudo apt-get install python3.8 python3-pip python3.8-venv
How to run for Ubuntu
@@ -36,18 +36,9 @@ There will be venv folder as of result.
How to run for Windows
----------------------
-1. First, please prepare Python 3.5-3.7
-2. Open the Command Prompt as an administrator
-3. cd(change directory) to the directory where one-compiler is installed
-4. run below command
-```
-$ ONE\install\bin> python -m venv venv
-$ ONE\install\bin> cd venv/Scripts
-$ ONE\install\bin\venv/Scripts> pip.exe install -U pip
-$ ONE\install\bin\venv/Scripts> pip.exe install -U tensorflow-cpu==2.3.0
-```
-
-After running the above command, go back to MinGW and run one-compiler.
+Support for Windows is not maintained for now.
+If you have any needs for running in Windows, please fire an issue.
+Or you can use Docker for Windows.
Trouble shooting
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index 0a0c4b14c..ebc165167 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -155,6 +155,7 @@ Current transformation options are
- fold_cast : This removes Cast operation which can be folded
- fold_dequantize : This removes Dequantize operation which can be folded
- fold_dwconv : This folds Depthwise Convolution operation which can be folded
+- fold_gather : This removes Gather operation which can be folded
- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
@@ -178,6 +179,7 @@ Current transformation options are
- generate_profile_data : This will turn on profiling data generation.
- remove_fakequant : This will remove all fakequant operators.
- remove_quantdequant : This will remove all Quantize-Dequantize sequence.
+- remove_redundant_quantize : This removes redundant quantize operators.
- remove_redundant_reshape : This fuses or removes redundant reshape operators.
- remove_redundant_transpose : This fuses or removes redundant transpose operators.
- remove_unnecessary_reshape : This removes unnecessary reshape operators.
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
index 90dfa77b8..5c313b44b 100644
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -154,25 +154,31 @@ def main():
config = _parse_cfg(args)
# verify configuration file
- drivers = [
- 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
- 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen'
+ bin_dir = os.path.dirname(os.path.realpath(__file__))
+ import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+ transform_drivers = [
+ 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
]
- _verify_cfg(drivers, config)
+ _verify_cfg(import_drivers_dict, config)
# verify optimization option file
_verify_opt(args)
# get sections to run
section_to_run = []
- for d in drivers:
+ for d in list(import_drivers_dict) + transform_drivers:
if _is_available_driver(config, d):
section_to_run.append(d)
# run
dir_path = os.path.dirname(os.path.realpath(__file__))
for section in section_to_run:
- driver_path = os.path.join(dir_path, _get_driver_name(section))
+ if section in import_drivers_dict:
+ # we already has driver name in dict
+ driver_name = import_drivers_dict[section]
+ else:
+ driver_name = _get_driver_name(section)
+ driver_path = os.path.join(dir_path, driver_name)
cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
cmd += ['-O', getattr(args, 'O')]
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq
index 9aef6270e..ef89a9297 100644
--- a/compiler/one-cmds/one-import-bcq
+++ b/compiler/one-cmds/one-import-bcq
@@ -25,6 +25,7 @@ import subprocess
import sys
import tempfile
+import onelib.make_cmd as _make_cmd
import utils as _utils
import generate_bcq_output_arrays as _bcq_info_gen
@@ -32,6 +33,10 @@ import generate_bcq_output_arrays as _bcq_info_gen
sys.tracebacklimit = 0
+def get_driver_cfg_section():
+ return "one-import-bcq"
+
+
def _get_parser():
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow with BCQ to circle')
@@ -155,7 +160,7 @@ def _convert(args):
tmpdir,
os.path.splitext(
os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite'
- tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
+ tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
generate_bcq_metadata_output_path,
tf2tfliteV2_output_path)
try:
@@ -171,7 +176,7 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
- tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+ tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
tf2tfliteV2_output_path,
getattr(args, 'output_path'))
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx
index 1c0c5498e..eaa136197 100644
--- a/compiler/one-cmds/one-import-onnx
+++ b/compiler/one-cmds/one-import-onnx
@@ -27,12 +27,25 @@ import tempfile
import onnx
import onnx_tf
+# ONNX legalizer is an optional feature
+# It enables conversion of some operations, but in experimental phase for now
+try:
+ import onnx_legalizer
+ _onnx_legalizer_enabled = True
+except ImportError:
+ _onnx_legalizer_enabled = False
+
+import onelib.make_cmd as _make_cmd
import utils as _utils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
+def get_driver_cfg_section():
+ return "one-import-onnx"
+
+
def _get_parser():
parser = argparse.ArgumentParser(
description='command line tool to convert ONNX to circle')
@@ -64,6 +77,10 @@ def _get_parser():
tf2tfliteV2_group.add_argument('--model_format', default='saved_model')
tf2tfliteV2_group.add_argument('--converter_version', default='v2')
+ parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+ parser.add_argument(
+ '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+
# save intermediate file(s)
parser.add_argument(
'--save_intermediate',
@@ -120,6 +137,11 @@ def _convert(args):
tmpdir = os.path.dirname(logfile_path)
# convert onnx to tf saved model
onnx_model = onnx.load(getattr(args, 'input_path'))
+ if _onnx_legalizer_enabled:
+ options = onnx_legalizer.LegalizeOptions
+ options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
+ options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+ onnx_legalizer.legalize(onnx_model, options)
tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
savedmodel_name = os.path.splitext(os.path.basename(
@@ -133,7 +155,7 @@ def _convert(args):
args.output_path))[0] + '.tflite'
tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
- tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(
+ tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
@@ -143,7 +165,7 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
- tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+ tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
tf2tfliteV2_output_path,
getattr(args, 'output_path'))
diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch
new file mode 100644
index 000000000..dbf1ba6d7
--- /dev/null
+++ b/compiler/one-cmds/one-import-pytorch
@@ -0,0 +1,366 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import importlib
+import inspect
+import os
+import sys
+import tempfile
+import torch
+import onnx
+import onnx_tf
+import json
+import zipfile
+
+import onnx_legalizer
+import onelib.make_cmd as _make_cmd
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def get_driver_spec():
+ return ("one-import-pytorch", _utils.DriverType.IMPORTER)
+
+
+def _get_parser():
+ parser = argparse.ArgumentParser(
+ description='command line tool to convert PyTorch to Circle')
+
+ _utils._add_default_arg(parser)
+
+ ## converter arguments
+ converter_group = parser.add_argument_group('converter arguments')
+
+ # input and output path.
+ converter_group.add_argument(
+ '-i', '--input_path', type=str, help='full filepath of the input file')
+ converter_group.add_argument(
+ '-p', '--python_path', type=str, help='full filepath of the python model file')
+ converter_group.add_argument(
+ '-o', '--output_path', type=str, help='full filepath of the output file')
+
+ # input arrays.
+ converter_group.add_argument(
+ '-s',
+ '--input_shapes',
+ type=str,
+ help=
+ 'Shapes corresponding to --input_arrays, colon-separated.(ex:\"1,4,4,3:1,20,20,3\")'
+ )
+ converter_group.add_argument(
+ '-t',
+ '--input_types',
+ type=str,
+ help='data types of input tensors, colon-separated (ex: float32, uint8, int32)')
+
+ # fixed options
+ tf2tflite_group = parser.add_argument_group('tf2tfliteV2 arguments')
+ tf2tflite_group.add_argument('--model_format', default='saved_model')
+ tf2tflite_group.add_argument('--converter_version', default='v2')
+
+ parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+ parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+
+ # save intermediate file(s)
+ parser.add_argument(
+ '--save_intermediate',
+ action='store_true',
+ help='Save intermediate files to output folder')
+
+ return parser
+
+
+def _verify_arg(parser, args):
+ """verify given arguments"""
+ # check if required arguments is given
+ missing = []
+ if not _utils._is_valid_attr(args, 'input_path'):
+ missing.append('-i/--input_path')
+ if not _utils._is_valid_attr(args, 'output_path'):
+ missing.append('-o/--output_path')
+ if not _utils._is_valid_attr(args, 'input_shapes'):
+ missing.append('-s/--input_shapes')
+ if not _utils._is_valid_attr(args, 'input_types'):
+ missing.append('-t/--input_types')
+
+ if len(missing):
+ parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+ args = parser.parse_args()
+ # print version
+ if args.version:
+ _utils._print_version_and_exit(__file__)
+
+ return args
+
+
+def _apply_verbosity(verbosity):
+ # NOTE
+ # TF_CPP_MIN_LOG_LEVEL
+ # 0 : INFO + WARNING + ERROR + FATAL
+ # 1 : WARNING + ERROR + FATAL
+ # 2 : ERROR + FATAL
+ # 3 : FATAL
+ if verbosity:
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+ else:
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+
+def _parse_shapes(shapes_str):
+ shapes = []
+ for shape_str in shapes_str.split(":"):
+ if shape_str != "":
+ shapes += [list(map(int, shape_str.split(",")))]
+ else:
+ shapes += [[]]
+ return shapes
+
+
+def _parse_types(types_str):
+ # There are no convenient way to create torch from string ot numpy dtype, so using this workaround
+ dtype_dict = {
+ "bool": torch.bool,
+ "uint8": torch.uint8,
+ "int8": torch.int8,
+ "int16": torch.int16,
+ "int32": torch.int32,
+ "int64": torch.int64,
+ "float16": torch.float16,
+ "float32": torch.float32,
+ "float64": torch.float64,
+ "complex64": torch.complex64,
+ "complex128": torch.complex128
+ }
+ array = types_str.split(",")
+ types = [dtype_dict[type_str.strip()] for type_str in array]
+ return types
+
+
+# merge contents of module into global namespace
+def _merge_module(module):
+ # is there an __all__? if so respect it
+ if "__all__" in module.__dict__:
+ names = module.__dict__["__all__"]
+ else:
+ # otherwise we import all names that don't begin with _
+ names = [x for x in module.__dict__ if not x.startswith("_")]
+ globals().update({k: getattr(module, k) for k in names})
+
+
+def _list_classes_from_module(module):
+ # Parsing the module to get all defined classes
+ is_member = lambda member: inspect.isclass(member) and member.__module__ == module.__name__
+ classes = [cls[1] for cls in inspect.getmembers(module, is_member)]
+ return classes
+
+
+def _extract_pytorch_model(log_file, parameters_path, python_path):
+ log_file.write(('Trying to load saved model\n').encode())
+ python_model_path = os.path.abspath(python_path)
+ module_name = os.path.basename(python_model_path)
+ module_dir = os.path.dirname(python_model_path)
+ sys.path.append(module_dir)
+ log_file.write(('Trying to load given python module\n').encode())
+ module_loader = importlib.machinery.SourceFileLoader(module_name, python_model_path)
+ module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+ python_model_module = importlib.util.module_from_spec(module_spec)
+
+ try:
+ module_loader.exec_module(python_model_module)
+ except:
+ raise ValueError('Failed to execute given python model file')
+
+ log_file.write(('Model python module is loaded\n').encode())
+ try:
+ # this branch assumes this parameters_path contains state_dict
+ state_dict = torch.load(parameters_path)
+ log_file.write(('Trying to find model class and fill it`s state dict\n').encode())
+ model_class_definitions = _list_classes_from_module(python_model_module)
+ if len(model_class_definitions) != 1:
+ raise ValueError("Expected only one class as model definition. {}".format(
+ model_class_definitions))
+ pytorch_model_class = model_class_definitions[0]
+ model = pytorch_model_class()
+ model.load_state_dict(state_dict)
+ return model
+ except:
+ # this branch assumes this parameters_path contains "entire" model
+ _merge_module(python_model_module)
+ log_file.write(('Model python module is merged into main environment\n').encode())
+ model = torch.load(parameters_path)
+ log_file.write(('Pytorch model loaded\n').encode())
+ return model
+
+
+def _extract_torchscript_model(log_file, input_path):
+ # assuming this is a pytorch script
+ log_file.write(('Trying to load TorchScript model\n').encode())
+ try:
+ pytorch_model = torch.jit.load(input_path)
+ return pytorch_model
+ except RuntimeError as e:
+ log_file.write((str(e) + '\n').encode())
+ log_file.write(
+ 'Failed to import input file. Maybe this it contains only weights? Try pass "python_path" argument\n'.
+ encode())
+ raise
+ log_file.write(('TorchScript model is loaded\n').encode())
+
+
+def _extract_mar_model(log_file, tmpdir, input_path):
+ mar_dir_path = os.path.join(tmpdir, 'mar')
+ with zipfile.ZipFile(input_path) as zip_input:
+ zip_input.extractall(path=mar_dir_path)
+ manifest_path = os.path.join(mar_dir_path, 'MAR-INF/MANIFEST.json')
+ with open(manifest_path) as manifest_file:
+ manifest = json.load(manifest_file)
+ serialized_file = os.path.join(mar_dir_path, manifest['model']['serializedFile'])
+ if 'modelFile' in manifest['model']:
+ model_file = os.path.join(mar_dir_path, manifest['model']['modelFile'])
+ return _extract_pytorch_model(log_file, serialized_file, model_file)
+ else:
+ return _extract_torchscript_model(log_file, serialized_file)
+
+
+def _convert(args):
+ _apply_verbosity(args.verbose)
+
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+ with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+ # save intermediate
+ if _utils._is_valid_attr(args, 'save_intermediate'):
+ tmpdir = os.path.dirname(logfile_path)
+ # convert pytorch to onnx model
+ input_path = getattr(args, 'input_path')
+ model_file = getattr(args, 'python_path')
+
+ if input_path[-4:] == '.mar':
+ pytorch_model = _extract_mar_model(f, tmpdir, input_path)
+ elif model_file is None:
+ pytorch_model = _extract_torchscript_model(f, input_path)
+ else:
+ pytorch_model = _extract_pytorch_model(f, input_path, model_file)
+
+ input_shapes = _parse_shapes(getattr(args, 'input_shapes'))
+ input_types = _parse_types(getattr(args, 'input_types'))
+
+ if len(input_shapes) != len(input_types):
+ raise ValueError('number of input shapes and input types must be equal')
+
+ sample_inputs = []
+ for input_spec in zip(input_shapes, input_types):
+ sample_inputs += [torch.ones(input_spec[0], dtype=input_spec[1])]
+
+ f.write(('Trying to inference loaded model').encode())
+ sample_outputs = pytorch_model(*sample_inputs)
+ f.write(('Acquired sample outputs\n').encode())
+
+ onnx_output_name = os.path.splitext(os.path.basename(
+ args.output_path))[0] + '.onnx'
+ onnx_output_path = os.path.join(tmpdir, onnx_output_name)
+
+ onnx_saved = False
+ # some operations are not supported in early opset versions, try several
+ for onnx_opset_version in range(9, 15):
+ f.write(('Trying to save onnx model using opset version ' +
+ str(onnx_opset_version) + '\n').encode())
+ try:
+ torch.onnx.export(
+ pytorch_model,
+ tuple(sample_inputs),
+ onnx_output_path,
+ example_outputs=sample_outputs,
+ opset_version=onnx_opset_version)
+ onnx_saved = True
+ break
+ except:
+ f.write(('attempt failed\n').encode())
+
+ if not onnx_saved:
+ raise ValueError('Failed to save temporary onnx model')
+
+ # convert onnx to tf saved mode
+ onnx_model = onnx.load(onnx_output_path)
+
+ options = onnx_legalizer.LegalizeOptions()
+ options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
+ options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+ onnx_legalizer.legalize(onnx_model, options)
+
+ tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+ savedmodel_name = os.path.splitext(os.path.basename(
+ args.output_path))[0] + '.savedmodel'
+ savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+ tf_savedmodel.export_graph(savedmodel_output_path)
+
+ # make a command to convert from tf to tflite
+ tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+ tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+ args.output_path))[0] + '.tflite'
+ tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+ del args.input_shapes
+ tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+ args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+ f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+ # convert tf to tflite
+ _utils._run(tf2tfliteV2_cmd, logfile=f)
+
+ # make a command to convert from tflite to circle
+ tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+ tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
+
+ f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+ # convert tflite to circle
+ _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+ # parse arguments
+ parser = _get_parser()
+ args = _parse_arg(parser)
+
+ # parse configuration file
+ _utils._parse_cfg(args, 'one-import-pytorch')
+
+ # verify arguments
+ _verify_arg(parser, args)
+
+ # convert
+ _convert(args)
+
+
+if __name__ == '__main__':
+ _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index e2294caa6..999255a34 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -25,9 +25,14 @@ import subprocess
import sys
import tempfile
+import onelib.make_cmd as _make_cmd
import utils as _utils
+def get_driver_cfg_section():
+ return "one-import-tf"
+
+
def _get_parser():
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow to circle')
@@ -146,7 +151,7 @@ def _convert(args):
tf2tfliteV2_output_path = os.path.join(
tmpdir,
os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite'
- tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
+ tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
getattr(args, 'input_path'),
tf2tfliteV2_output_path)
@@ -157,7 +162,7 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
- tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+ tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
tf2tfliteV2_output_path,
getattr(args, 'output_path'))
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite
index 7eee0484a..2d756bff6 100644
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -24,12 +24,17 @@ import os
import subprocess
import sys
+import onelib.make_cmd as _make_cmd
import utils as _utils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
+def get_driver_cfg_section():
+ return "one-import-tflite"
+
+
def _get_parser():
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow lite to circle')
@@ -77,7 +82,7 @@ def _convert(args):
with open(logfile_path, 'wb') as f:
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
- tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+ tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
getattr(args, 'input_path'),
getattr(args, 'output_path'))
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index a64abff19..8b1f3f7be 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -24,6 +24,8 @@ import os
import subprocess
import sys
+import onelib.constant as _constant
+import onelib.make_cmd as _make_cmd
import utils as _utils
# TODO Find better way to suppress trackback on error
@@ -60,7 +62,7 @@ def _get_parser():
'-o', '--output_path', type=str, help='full filepath of the output file')
# optimization pass
- for opt in _utils._CONSTANT.OPTIMIZATION_OPTS:
+ for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
# opt = (option_name, help_message)
circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
@@ -99,7 +101,7 @@ def _optimize(args):
with open(logfile_path, 'wb') as f:
# make a command to optimize circle model
circle2circle_path = os.path.join(dir_path, 'circle2circle')
- circle2circle_cmd = _utils._make_circle2circle_cmd(args, circle2circle_path,
+ circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path,
getattr(args, 'input_path'),
getattr(args, 'output_path'))
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 285191761..0f75166a7 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -26,16 +26,17 @@ VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
if [ ! -f ${VENV_ACTIVATE} ]; then
# Create python virtual enviornment
- python3 -m venv "${DRIVER_PATH}/venv"
+ python3.8 -m venv "${DRIVER_PATH}/venv"
fi
# NOTE version
# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
-VER_TENSORFLOW=2.3.0
-VER_ONNX=1.10.1
-VER_ONNX_TF=1.9.0
+VER_TENSORFLOW=2.8.0
+VER_ONNX=1.11.0
+VER_ONNXRUNTIME=1.11.0
+VER_ONNX_TF=1.10.0
# Install tensorflow
@@ -54,18 +55,32 @@ if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
fi
-# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install -U pip==20.2.1 setuptools==49.3.0
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
# Install PyTorch and ONNX related
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+# torch_stable.html points to download URL of torch wheel file(s)
+# but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+ TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
# Provide install of custom onnx-tf
if [ -n "${EXT_ONNX_TF_WHL}" ]; then
- ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} ${EXT_ONNX_TF_WHL}
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
else
- ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF}
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
fi
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index 22d4ddb0e..f2eff24bd 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -119,6 +119,18 @@ def _get_parser():
help=
"calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
)
+ quantization_group.add_argument(
+ '--TF-style_maxpool',
+ action='store_true',
+ help=
+ "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
+ )
+ quantization_group.add_argument(
+ '--quant_config',
+ type=str,
+ help=
+ "Path to the quantization configuration file."
+ )
# arguments for force_quantparam option
force_quantparam_group = parser.add_argument_group(
@@ -137,6 +149,19 @@ def _get_parser():
force_quantparam_group.add_argument(
'--zero_point', type=int, action='append', help='zero point (int)')
+ # arguments for copy_quantparam option
+ copy_quantparam_group = parser.add_argument_group(
+ 'arguments for copy_quantparam option')
+
+ copy_quantparam_group.add_argument(
+ '--copy_quantparam',
+ action='store_true',
+ help='copy quantparam (scale, zero_point) of a tensor to another tensor.')
+ copy_quantparam_group.add_argument(
+ '--src_tensor_name', type=str, action='append', help='tensor name (string)')
+ copy_quantparam_group.add_argument(
+ '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
+
return parser
@@ -171,6 +196,11 @@ def _verify_arg(parser, args):
missing.append('--scale')
if not _utils._is_valid_attr(args, 'zero_point'):
missing.append('--zero_point')
+ if _utils._is_valid_attr(args, 'copy_quantparam'):
+ if not _utils._is_valid_attr(args, 'src_tensor_name'):
+ missing.append('--src_tensor_name')
+ if not _utils._is_valid_attr(args, 'dst_tensor_name'):
+ missing.append('--dst_tensor_name')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
if _utils._is_valid_attr(args, 'force_quantparam'):
@@ -180,6 +210,12 @@ def _verify_arg(parser, args):
if len(tensors) != len(scales) or len(tensors) != len(zerops):
parser.error(
'The same number of tensor_name, scale, and zero_point should be given.')
+ if _utils._is_valid_attr(args, 'copy_quantparam'):
+ src_tensors = getattr(args, 'src_tensor_name')
+ dst_tensors = getattr(args, 'dst_tensor_name')
+ if len(src_tensors) != len(dst_tensors):
+ parser.error(
+ 'The same number of src_tensor_name and dst_tensor_name should be given.')
def _parse_arg(parser):
@@ -197,6 +233,11 @@ def _quantize(args):
_write_qparam(args)
return
+ if _utils._is_valid_attr(args, 'copy_quantparam'):
+ # copy quantization parameters
+ _copy_qparam(args)
+ return
+
# get file path to log
dir_path = os.path.dirname(os.path.realpath(__file__))
logfile_path = os.path.realpath(args.output_path) + '.log'
@@ -294,12 +335,19 @@ def _quantize(args):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
if _utils._is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
+ if _utils._is_valid_attr(args, 'TF-style_maxpool'):
+ circle_quantizer_cmd.append('--TF-style_maxpool')
if _utils._is_valid_attr(args, 'input_type'):
circle_quantizer_cmd.append('--input_type')
circle_quantizer_cmd.append(getattr(args, 'input_type'))
if _utils._is_valid_attr(args, 'output_type'):
circle_quantizer_cmd.append('--output_type')
circle_quantizer_cmd.append(getattr(args, 'output_type'))
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # NOTE --config conflicts with --config option in onecc, so
+ # we use quant_config for one-quantize
+ circle_quantizer_cmd.append('--config')
+ circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
circle_quantizer_cmd.append(tmp_output_path_2)
if _utils._is_valid_attr(args, 'output_path'):
@@ -351,6 +399,40 @@ def _write_qparam(args):
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+def _copy_qparam(args):
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+
+ with open(logfile_path, 'wb') as f:
+ # get driver path
+ circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+ # make a command to write qparams to the tensors
+ circle_quantizer_cmd = [circle_quantizer_path]
+ # verbose
+ if _utils._is_valid_attr(args, 'verbose'):
+ circle_quantizer_cmd.append('--verbose')
+ if _utils._is_valid_attr(args, 'src_tensor_name'):
+ src_tensor_name = getattr(args, 'src_tensor_name')
+ if _utils._is_valid_attr(args, 'dst_tensor_name'):
+ dst_tensor_name = getattr(args, 'dst_tensor_name')
+ for (src, dst) in zip(src_tensor_name, dst_tensor_name):
+ circle_quantizer_cmd.append('--copy_quantparam')
+ circle_quantizer_cmd.append(src)
+ circle_quantizer_cmd.append(dst)
+ # input and output path
+ if _utils._is_valid_attr(args, 'input_path'):
+ circle_quantizer_cmd.append(getattr(args, 'input_path'))
+ if _utils._is_valid_attr(args, 'output_path'):
+ circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+ f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+ # run circle-quantizer
+ _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
def main():
# parse arguments
parser = _get_parser()
diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc
index ca440d852..25682ff4b 100644
--- a/compiler/one-cmds/onecc
+++ b/compiler/one-cmds/onecc
@@ -104,10 +104,6 @@ def _verify_arg(parser, args):
def _get_driver_name(driver_name):
return {
- 'one-import-bcq': 'one-import-bcq',
- 'one-import-tf': 'one-import-tf',
- 'one-import-tflite': 'one-import-tflite',
- 'one-import-onnx': 'one-import-onnx',
'one-optimize': 'one-optimize',
'one-quantize': 'one-quantize',
'one-pack': 'one-pack',
@@ -130,19 +126,15 @@ def _is_available_driver(config, driver_name):
'onecc', driver_name)
-def _verify_cfg(driver_list, config):
+def _verify_cfg(import_driver_list, config):
if not config.has_section('onecc'):
raise ImportError('[onecc] section is required in configuration file')
import_driver_cnt = 0
- if _is_available_driver(config, 'one-import-tf'):
- import_driver_cnt += 1
- if _is_available_driver(config, 'one-import-tflite'):
- import_driver_cnt += 1
- if _is_available_driver(config, 'one-import-bcq'):
- import_driver_cnt += 1
- if _is_available_driver(config, 'one-import-onnx'):
- import_driver_cnt += 1
+ for d in import_driver_list:
+ if _is_available_driver(config, d):
+ import_driver_cnt += 1
+
if import_driver_cnt > 1:
raise AssertionError('Only one import-* driver can be executed')
@@ -170,22 +162,27 @@ def main():
config = _parse_cfg(args)
# verify configuration file
- drivers = [
- 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
+ bin_dir = os.path.dirname(os.path.realpath(__file__))
+ import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+ transform_drivers = [
'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
]
- _verify_cfg(drivers, config)
+ _verify_cfg(import_drivers_dict, config)
# get sections to run
section_to_run = []
- for d in drivers:
+ for d in list(import_drivers_dict) + transform_drivers:
if _is_available_driver(config, d):
section_to_run.append(d)
# run
dir_path = os.path.dirname(os.path.realpath(__file__))
for section in section_to_run:
- driver_name = _get_driver_name(section)
+ if section in import_drivers_dict:
+ # we already has driver name in dict
+ driver_name = import_drivers_dict[section]
+ else:
+ driver_name = _get_driver_name(section)
options = ['--config', getattr(args, 'config'), '--section', section]
if _utils._is_valid_attr(args, 'verbose'):
options.append('--verbose')
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
new file mode 100644
index 000000000..7ddd7382d
--- /dev/null
+++ b/compiler/one-cmds/onelib/constant.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class CONSTANT:
+ __slots__ = () # This prevents access via __dict__.
+ OPTIMIZATION_OPTS = (
+ # (OPTION_NAME, HELP_MESSAGE)
+ ('O1', 'enable O1 optimization pass'),
+ ('convert_nchw_to_nhwc',
+ 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
+ ),
+ ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
+ ('nchw_to_nhwc_input_shape',
+ 'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
+ ('nchw_to_nhwc_output_shape',
+ 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
+ ('fold_add_v2', 'fold AddV2 op with constant inputs'),
+ ('fold_cast', 'fold Cast op with constant input'),
+ ('fold_dequantize', 'fold Dequantize op'),
+ ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
+ ('fold_gather', 'fold Gather op'),
+ ('fold_sparse_to_dense', 'fold SparseToDense op'),
+ ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+ ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+ ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
+ ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
+ ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
+ ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+ ('fuse_bcq', 'apply Binary Coded Quantization'),
+ ('fuse_preactivation_batchnorm',
+ 'fuse BatchNorm operators of pre-activations to Convolution op'),
+ ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
+ ('fuse_transpose_with_mean',
+ 'fuse Mean with a preceding Transpose under certain conditions'),
+ ('make_batchnorm_gamma_positive',
+ 'make negative gamma of BatchNorm to a small positive value (1e-10).'
+ ' Note that this pass can change the execution result of the model.'
+ ' So, use it only when the impact is known to be acceptable.'),
+ ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
+ ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+ ('replace_cw_mul_add_with_depthwise_conv',
+ 'replace channel-wise Mul/Add with DepthwiseConv2D'),
+ ('remove_fakequant', 'remove FakeQuant ops'),
+ ('remove_quantdequant', 'remove Quantize-Dequantize sequence'),
+ ('remove_redundant_quantize', 'remove redundant Quantize ops'),
+ ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
+ ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
+ ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
+ ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
+ ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
+ ('remove_unnecessary_split', 'remove unnecessary split ops'),
+ ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
+ ('resolve_customop_batchmatmul',
+ 'convert Custom(BatchMatmul) op to BatchMatmul op'),
+ ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
+ ('resolve_customop_max_pool_with_argmax',
+ 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
+ ('shuffle_weight_to_16x1float32',
+ 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
+ ' Note that it only converts weights whose row is a multiple of 16'),
+ ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+ ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
+ ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
+ ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
+ ('substitute_strided_slice_to_reshape',
+ 'convert certain condition StridedSlice to Reshape'),
+ ('substitute_transpose_to_reshape',
+ 'convert certain condition Transpose to Reshape'),
+ ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
+ ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
+
+
+CONSTANT = CONSTANT()
diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py
new file mode 100644
index 000000000..d8380f28d
--- /dev/null
+++ b/compiler/one-cmds/onelib/make_cmd.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import onelib.constant as _constant
+
+def _is_valid_attr(args, attr):
+ return hasattr(args, attr) and getattr(args, attr)
+
+
+def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
+ """make a command for running tf2tfliteV2.py"""
+ cmd = [sys.executable, os.path.expanduser(driver_path)]
+ # verbose
+ if _is_valid_attr(args, 'verbose'):
+ cmd.append('--verbose')
+ # model_format
+ if _is_valid_attr(args, 'model_format_cmd'):
+ cmd.append(getattr(args, 'model_format_cmd'))
+ elif _is_valid_attr(args, 'model_format'):
+ cmd.append('--' + getattr(args, 'model_format'))
+ else:
+ cmd.append('--graph_def') # default value
+ # converter version
+ if _is_valid_attr(args, 'converter_version_cmd'):
+ cmd.append(getattr(args, 'converter_version_cmd'))
+ elif _is_valid_attr(args, 'converter_version'):
+ cmd.append('--' + getattr(args, 'converter_version'))
+ else:
+ cmd.append('--v1') # default value
+ # input_path
+ if _is_valid_attr(args, 'input_path'):
+ cmd.append('--input_path')
+ cmd.append(os.path.expanduser(input_path))
+ # output_path
+ if _is_valid_attr(args, 'output_path'):
+ cmd.append('--output_path')
+ cmd.append(os.path.expanduser(output_path))
+ # input_arrays
+ if _is_valid_attr(args, 'input_arrays'):
+ cmd.append('--input_arrays')
+ cmd.append(getattr(args, 'input_arrays'))
+ # input_shapes
+ if _is_valid_attr(args, 'input_shapes'):
+ cmd.append('--input_shapes')
+ cmd.append(getattr(args, 'input_shapes'))
+ # output_arrays
+ if _is_valid_attr(args, 'output_arrays'):
+ cmd.append('--output_arrays')
+ cmd.append(getattr(args, 'output_arrays'))
+
+ return cmd
+
+
+def make_tflite2circle_cmd(driver_path, input_path, output_path):
+ """make a command for running tflite2circle"""
+ cmd = [driver_path, input_path, output_path]
+ return [os.path.expanduser(c) for c in cmd]
+
+
+def make_circle2circle_cmd(args, driver_path, input_path, output_path):
+ """make a command for running circle2circle"""
+ cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
+ # profiling
+ if _is_valid_attr(args, 'generate_profile_data'):
+ cmd.append('--generate_profile_data')
+ # optimization pass(only true/false options)
+ # TODO support options whose number of arguments is more than zero
+ for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
+ if _is_valid_attr(args, opt[0]):
+ # ./driver --opt[0]
+ if type(getattr(args, opt[0])) is bool:
+ cmd.append('--' + opt[0])
+ """
+ This condition check is for config file interface, usually would be
+ SomeOption=True
+ but user can write as follows while development
+ SomeOption=False
+ instead of removing SomeOption option
+ """
+ if type(getattr(args, opt[0])) is str and not getattr(
+ args, opt[0]).lower() in ['false', '0', 'n']:
+ cmd.append('--' + opt[0])
+
+ return cmd
diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py
new file mode 100755
index 000000000..26c2b75b9
--- /dev/null
+++ b/compiler/one-cmds/onnx_legalizer.py
@@ -0,0 +1,1065 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import onnx.numpy_helper
+import sys
+import numpy as np
+import re
+
+# Transform onnx model to make it compilable with our toolchain
+#
+# This code works with onnx model in proto format. See proto buffers format in
+# https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3
+#
+# More examples of handling onnx models could be found here:
+# https://github.com/onnx/onnx/tree/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/examples
+#
+# List of transformations:
+# - Replace RNN operation with unrolled subgraph
+# - Replace LSTM operation with unrolled subgraph
+
+
+class LegalizeOptions:
+ """Controls transformations that legalizer apply
+
+ Attributes:
+ unroll_rnn (bool): default is False. If True - unrolls RNN operations
+ unroll_lstm (bool): default is False. If True - unrolls LSTM operations
+ """
+
+ unroll_rnn = False
+ unroll_lstm = False
+
+
+def _reverse_str(s):
+ return ''.join(reversed(s))
+
+
+def _parse_tensor_name(name):
+ """Splits tensor name to base part and serial number
+
+ Most of tensor names have following format: "tensor_123".
+ This function breaks name into two values: "tensor_" and 123.
+ Tensor names like this: "321" are broken into "" and 321.
+
+ Serial number is used to create unique tensor names using given base name.
+
+ Args:
+ name (str): tensor name
+
+ Returns:
+ tuple of str, int: base name and serial number of tensor
+ """
+ rev = _reverse_str(name)
+ m = re.match('(\d*)(.*)', rev)
+ if m.groups()[0] != '':
+ return (_reverse_str(m.groups()[1]), int(_reverse_str(m.groups()[0])))
+ else:
+ return (_reverse_str(m.groups()[1]), 0)
+
+
+class _ModelTransformerHelper:
+ """Helper for onnx model transformation
+
+ This helper is used for convenient operation replacement in onnx model
+
+ Attributes:
+ _model (onnx.onnx_ml_pb2.ModelProto): target model that should be changed
+ _nodes_to_delete (list of onnx.onnx_ml_pb2.NodeProto): list of replaced operations
+ _insert_id (int): position to insert created operations (should be in topologically sorted)
+ _base_name_idx (dict from str to int): maps tensor "base" name to
+ largest existing serial num. For example model has tensors "t_1", "t_2", "t_4",
+ in that case _base_name_idx["t_"] == 4.
+ This attribute is used for unique tensor name generation.
+ """
+
+ def __init__(self, model):
+ self._model = model
+ self._nodes_to_delete = []
+ self._insert_id = 0
+ # each tensor has name containing base name and unique number. for example:
+ # "abc_123": "abs_" - base name, "123" - unique number
+ # if no number in name, consider it is equal to "0"
+
+ # mapping from base names to largest given number
+ self._base_name_idx = {}
+ # gather name information for existing tensors
+ for node in model.graph.node:
+ for t in list(node.input) + list(node.output):
+ base_name, number = _parse_tensor_name(t)
+ if base_name in self._base_name_idx:
+ self._base_name_idx[base_name] = max(self._base_name_idx[base_name],
+ number)
+ else:
+ self._base_name_idx[base_name] = number
+
+ def make_tensor_with_base_name(self, base_name):
+ """ Create unique name for given base_name
+
+ Args:
+ base_name (str): base tensor name
+
+ Returns:
+ str : unique tensor name that starts with base_name
+ """
+ if base_name in self._base_name_idx:
+ self._base_name_idx[base_name] += 1
+ return base_name + str(self._base_name_idx[base_name])
+ else:
+ self._base_name_idx[base_name] = 0
+ return base_name + '0'
+
+ def make_node(self, opcode, inputs, outputs, *p_args, **k_args):
+ """Create arbitrary node and insert it in graph.
+
+ Args:
+ opcode (str): opcode name of desired operation
+ inputs (list of str): names of input tensors
+ outputs (list of str or int): names of existing tensors to use as output tensors for operation or
+ number of tensors that should be created
+ p_args: additional arguments for onnx make_node helper
+ k_args: attributes for onnx node
+
+ Returns:
+ list of str: list of output tensor names
+ """
+ if type(outputs) == int:
+ outputs = [self.make_tensor_with_base_name('') for i in range(outputs)]
+ assert (type(outputs) == list)
+ node = onnx.helper.make_node(opcode, inputs, outputs, *p_args, **k_args)
+ self._model.graph.node.insert(self._insert_id, node)
+ self._insert_id += 1
+ return outputs
+
+ def make_split(self, input, split_sizes, axis):
+ """Create Split operation and insert it in graph.
+
+ Args:
+ input (str): name of input tensor
+ split_sizes (list of int): list of split sizes
+ axis (int): number of axis to split
+
+ Returns:
+ list: list of output tensor names
+ """
+ return self.make_node(
+ 'Split', [input], len(split_sizes), axis=axis, split=split_sizes)
+
+ def make_concat(self, inputs, axis):
+ """Create Concat operation and insert it in graph.
+
+ Args:
+ inputs (list of str): list of tensors names to concat
+ axis (int): axis number to concat
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Concat', inputs, 1, axis=axis)[0]
+
+ def make_squeeze(self, input, axes):
+ """Create Squeeze operation and insert it in graph.
+
+ Args:
+ input (str): name of input tensor
+ axes (list of int): list of dimension containing ones to remove
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Squeeze', [input], 1, axes=axes)[0]
+
+ def make_unsqueeze(self, input, axes):
+ """Create Unsqueeze operation and insert it in graph.
+
+ Args:
+ input (str): name of input tensor
+ axes (list of int): list of dimension to insert ones
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Unsqueeze', [input], 1, axes=axes)[0]
+
+ def make_gemm(self, A, B, C, trans_a=False, trans_b=False):
+ """Create Gemm operation and insert it in graph.
+
+ Result tensor contains A*B + C
+
+ Args:
+ A (str): name of tensor A
+ B (str): name of tensor B
+ C (str): name of tensor C
+ transA (bool): if True, transpose tensor A before multiplication
+ transB (bool): if True, transpose tensor B before multiplication
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node(
+ 'Gemm', [A, B, C], 1, transA=bool(trans_a), transB=bool(trans_b))[0]
+
+ def make_add(self, a, b):
+ """Creates Add operation and insert it in graph.
+
+ Args:
+ a (str): name of left operand tensor
+ b (str): name of right operand tensor
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Add', [a, b], 1)[0]
+
+ def make_mul(self, a, b):
+ """Creates Mul operation and insert it in graph.
+
+ Args:
+ a (str): name of left operand tensor
+ b (str): name of right operand tensor
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Mul', [a, b], 1)[0]
+
+ def make_clip(self, input, min, max):
+ """Create Clip operation and insert it in graph.
+
+ Args:
+ input (str): input tensor name
+ min (int/float): lower clip bound
+ max (int/float ): upper clip bound
+
+ Returns:
+ str: output tensor name
+ """
+ return self.make_node('Clip', [input], 1, min=min, max=max)[0]
+
+ def make_act(self, input, act_name):
+ """Create activation function operation and insert it in graph.
+
+ Args:
+ input (str): input tensor name
+ act_name (str): name of activation function, one of ['Relu', 'Tanh', 'Sigmoid']
+
+ Returns:
+ str: output tensor name
+ """
+ assert (act_name in ['Relu', 'Tanh', 'Sigmoid'])
+ return self.make_node(act_name, [input], 1)[0]
+
+ def make_constant_tensor(self, tensor_data, base_name):
+ """Creates onnx constant tensor
+
+ Args:
+ tensor_data (numpy.ndarray): tensor data
+ base_name (str): prefix of constant tensor name
+
+ Returns:
+ str: name of created constant tensor
+ """
+ tensor = onnx.numpy_helper.from_array(tensor_data)
+ tensor.name = self.make_tensor_with_base_name(base_name)
+ self._model.graph.initializer.append(tensor)
+ return tensor.name
+
+ def mark_for_deletion(self, node):
+ self._nodes_to_delete += [node]
+
+ def get_insert_id(self):
+ return self._insert_id
+
+ def set_insert_id(self, insert_id):
+ self._insert_id = insert_id
+
+ def delete_marked_nodes(self):
+ for node in self._nodes_to_delete:
+ self._model.graph.node.remove(node)
+
+
+class _TensorInfo:
+ def __init__(self, dtype, shape):
+ self.dtype = dtype
+ self.shape = shape
+
+
+def _get_tensor_infos(model):
+ """Infer tensor shapes and dtypes
+ Args:
+ model (onnx.onnx_ml_pb2.ModelProto): model to process
+
+ Returns:
+ dict from str to _TensorInfo: maps tensor name to shape and dtype information
+ """
+
+ inferred_shape_model = onnx.shape_inference.infer_shapes(model)
+
+ infos = {}
+ for tensor in list(inferred_shape_model.graph.value_info) + list(
+ inferred_shape_model.graph.input):
+ info = _TensorInfo(tensor.type.tensor_type.elem_type, [])
+ for dim in tensor.type.tensor_type.shape.dim:
+ info.shape += [dim.dim_value]
+ infos[tensor.name] = info
+
+ for tensor in list(model.graph.initializer):
+ infos[tensor.name] = _TensorInfo(tensor.data_type, tensor.dims)
+ return infos
+
+
+def _dtype_to_np(dtype):
+ """Convert onnx dtype value to numpy dtype class
+
+ For more types see:
+ https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3#L484
+
+ Args:
+ dtype (int): onnx dtype
+
+ Returns:
+ numpy data type: numpy dtype, like np.float32
+ """
+
+ if dtype == 1:
+ return np.float32
+ else:
+ raise NotImplementedError('unsupported data type')
+
+
+def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name):
+ """Generate subgraph of one direction of unrolled RNN layer
+
+ Args:
+ transformer (_ModelTransformerHelper): helper for model generation
+ X (list of str): names of input tensors in sequence. Tensor shapes: [batch_size, input_size].
+ W (str): name of weight tensor
+ R (str): name of recurrence weight tensor
+ B (str): name of bias tensor
+ initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+ clip (float or None): range which clips input of activations
+ act (str): activation function
+ """
+ # one direction RNN:
+ #
+ # For details see:
+ # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#RNN-7
+ #
+ # H = f(X*(W^T) + h*(R^T) + B)
+ #
+ # H - new hidden state
+ # h - previous hidden state
+ # X - current input
+ # W - input weights matrix
+ # R - reccurent weights matrix
+ # Wb - input weights matmul bias
+ # Rb - reccurent weights matmul bias
+ # f - activation function
+
+ seq_length = len(X)
+ first_iter = 0
+ state_tensors = []
+ if initial_h is not None:
+ previous_state_tensor = initial_h
+ else:
+ first_iter = 1
+ state_tensor = transformer.make_gemm(X[0], W, B, trans_b=True)
+ if clip != None:
+ state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+ previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+ state_tensors += [previous_state_tensor]
+
+ for i in range(first_iter, seq_length):
+ state_tensor = transformer.make_gemm(X[i], W, B, trans_b=True)
+ state_tensor = transformer.make_gemm(
+ previous_state_tensor, R, state_tensor, trans_b=True)
+ if clip != None:
+ state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+ previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+ state_tensors += [previous_state_tensor]
+ return state_tensors
+
+
+def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation,
+ clip, direction, hidden_size, layout):
+ """Generate Simple (forward or reverse) unrolled RNN
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional RNN operation to unroll
+ x (list of str): list of input tensors (input tensor split along "time" dimension)
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ activation (str): name of activation function
+ clip (float or None): range which clips input of activations
+ direction (str): "forward" or "reverse"
+ hidden_size (int): size of hidden state
+ layout (int): See attribute description:
+ https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+ """
+
+ inputs = original_node.input
+ outputs = original_node.output
+ if direction == 'reverse':
+ x.reverse()
+ w = transformer.make_squeeze(inputs[1], axes=[0])
+ r = transformer.make_squeeze(inputs[2], axes=[0])
+ if len(inputs) > 3 and inputs[3] != '':
+ raw_bias_tensor = transformer.make_squeeze(inputs[3], axes=[0])
+ splitted_bias_tensors = transformer.make_split(
+ raw_bias_tensor, split_sizes=[hidden_size] * 2, axis=0)
+ b = transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+ else:
+ data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+ b = transformer.make_constant_tensor(
+ np.zeros(hidden_size, dtype=data_type), "zero_bias")
+ if len(inputs) > 5 and inputs[5] != '':
+ direction_dim = layout
+ initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+ else:
+ initial_h = None
+ state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip,
+ activation)
+ y_direction_dim = layout + 1
+ y_h_direction_dim = layout
+ state_layout_tensors = []
+ seq_length_dim = layout
+ for state in state_tensors:
+ state_layout_tensors += [
+ transformer.make_unsqueeze(state, axes=[seq_length_dim, y_direction_dim])
+ ]
+
+ # use low-level interface to attach to existing tensors
+ Y_h = outputs[1]
+ transformer.make_node(
+ 'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+ Y = outputs[0]
+ transformer.make_node(
+ 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations,
+ clip, hidden_size, layout):
+ """Generate Bidirectional unrolled RNN
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional RNN operation to unroll
+ x (list of str): list of input tensors (input tensor split along "time" dimension)
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ activations (list of str): list of len (2) containing names of forward and reverse activations
+ clip (float or None): range which clips input of activations
+ hidden_size (int): size of hidden state
+ layout (int): See attribute description:
+ https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+ """
+
+ inputs = original_node.input
+ outputs = original_node.output
+ w_bi = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+ r_bi = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+ w = []
+ r = []
+ for d in range(2):
+ w += [transformer.make_squeeze(w_bi[d], axes=[0])]
+ r += [transformer.make_squeeze(r_bi[d], axes=[0])]
+
+ b = []
+ if len(inputs) > 3 and inputs[3] != '':
+ raw_bias_tensors = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+ for d in range(2):
+ raw_bias_tensors_squeezed = transformer.make_squeeze(
+ raw_bias_tensors[d], axes=[0])
+ splitted_bias_tensors = transformer.make_split(
+ raw_bias_tensors_squeezed, split_sizes=[hidden_size] * 2, axis=0)
+ b += [
+ transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+ ]
+ else:
+ data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+ b = [
+ transformer.make_constant_tensor(
+ np.zeros(hidden_size, dtype=data_type), "zero_bias")
+ ] * 2
+ initial_h = [None, None]
+ if len(inputs) > 5 and inputs[5] != '':
+ direction_dim = layout
+ initial_h = transformer.make_split(
+ inputs[5], split_sizes=[1, 1], axis=direction_dim)
+ for d in range(2):
+ initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+ state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0],
+ initial_h[0], clip, activations[0])
+ x.reverse()
+ state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1],
+ initial_h[1], clip, activations[1])
+ state_b_tensors.reverse()
+
+ y_direction_dim = layout + 1
+ y_h_direction_dim = layout
+ state_layout_tensors = []
+ seq_length_dim = layout
+ seq_length = len(x)
+ for t in range(seq_length):
+ state_f = state_f_tensors[t]
+ state_b = state_b_tensors[t]
+ state_layout_tensors_f = transformer.make_unsqueeze(
+ state_f, axes=[seq_length_dim, y_direction_dim])
+ state_layout_tensors_b = transformer.make_unsqueeze(
+ state_b, axes=[seq_length_dim, y_direction_dim])
+ state_layout_tensors += [
+ transformer.make_concat(
+ [state_layout_tensors_f, state_layout_tensors_b], axis=y_direction_dim)
+ ]
+
+ last_f_state_layout_tensor = transformer.make_unsqueeze(
+ state_f_tensors[-1], axes=[y_h_direction_dim])
+ last_b_state_layout_tensor = transformer.make_unsqueeze(
+ state_b_tensors[0], axes=[y_h_direction_dim])
+
+ # use low-level interface to attach to existing tensors
+ Y_h = outputs[1]
+ transformer.make_node(
+ 'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+ axis=y_h_direction_dim)
+
+ Y = outputs[0]
+ transformer.make_node(
+ 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_RNN(transformer, tensor_infos, node):
+ """Unroll RNN operation
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ node (onnx.onnx_ml_pb2.NodeProto): RNN operation to unroll
+ """
+ inputs = node.input
+ if len(inputs) > 4 and inputs[4] != '':
+ raise NotImplementedError('Variadic length of output is not supported')
+ # attributes
+ activation_alpha = []
+ activation_beta = []
+ activations = ['Tanh', 'Tanh']
+ clip = None
+ direction = 'forward'
+ hidden_size = 0
+ layout = 0
+
+ for attr in node.attribute:
+ if attr.name == 'activation_alpha':
+ activation_alpha = attr.floats
+ if attr.name == 'activation_beta':
+ activation_beta = attr.floats
+ if attr.name == 'activations':
+ activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+ if attr.name == 'clip':
+ clip = attr.f
+ if attr.name == 'direction':
+ direction = attr.s.decode('UTF-8')
+ if attr.name == 'hidden_size':
+ hidden_size = attr.i
+ if attr.name == 'layout':
+ layout = attr.i
+
+ if len(activation_alpha) > 0 or len(activation_beta) > 0:
+ raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+ for act in activations:
+ if act not in ['Relu', 'Tanh', 'Sigmoid']:
+ raise NotImplementedError('Unsupported activation function')
+
+ seq_length_dim = layout
+ seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+ if hidden_size == 0:
+ hidden_size = tensor_infos[inputs[2]].shape[2]
+
+ input_split_tensor = transformer.make_split(
+ inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+ x = []
+ for i in range(len(input_split_tensor)):
+ input_frame_tensor = input_split_tensor[i]
+ squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+ x += [squeezed_frame_tensor]
+
+ if direction in ['forward', 'reverse']:
+ _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0],
+ clip, direction, hidden_size, layout)
+ elif direction == 'bidirectional':
+ _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip,
+ hidden_size, layout)
+ else:
+ raise RuntimeError('Unknown RNN type')
+
+ transformer.mark_for_deletion(node)
+
+
+def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip,
+ act, dtype, hidden_size, batch_size):
+ """Generate subgraph for one direction of unrolled LSTM layer
+
+ Args:
+ transformer (_ModelTransformerHelper): helper for model generation
+ X (list of str): names of tensors in input sequence. Each tensor shape: [batch_size, input_size]
+ W (str): name of concatenated weight tensor: [input, output, forget, cell]
+ R (str): name of concatenated recurrence weights tensor: [input, output, forget, cell]
+ B (str): name of concatenated bias tensor: [input, output, forget, cell]
+ initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+ initial_c (str or None): name of tensor containing initial cell state. Shape [batch_size, hidden_size]
+ P (str or None): name of concatenated peephole tensor: [input, output, forget]
+ clip (float or None): range which clips input of activations
+ act (dict of str): activation functions {'f': 'Sigmoid', 'g': 'Tanh', 'h': 'Tanh'}
+ dtype (numpy dtype): data type used in created LSTM operation
+ hidden_size (int): hidden dimension
+ batch_size (int): batch dimension
+ """
+ # one direction LSTM:
+ #
+ # For details see:
+ # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#LSTM-7
+ #
+ # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+ # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+ # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+ # Ct = ft (.) Ct-1 + it (.) ct
+ # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+ # Ht = ot (.) h(Ct)
+ #
+ # X - input tensor
+ # i - input gate
+ # o - output gate
+ # f - forget gate
+ # c - cell gate
+ # t - time step (t-1 means previous time step)
+ # W[iofc] - W parameter weight matrix for input, output, forget, and cell gates
+ # R[iofc] - R recurrence weight matrix for input, output, forget, and cell gates
+ # Wb[iofc] - W bias vectors for input, output, forget, and cell gates
+ # Rb[iofc] - R bias vectors for input, output, forget, and cell gates
+ # P[iof] - P peephole weight vector for input, output, and forget gates
+ # WB[iofc] - W parameter weight matrix for backward input, output, forget, and cell gates
+ # RB[iofc] - R recurrence weight matrix for backward input, output, forget, and cell gates
+ # WBb[iofc] - W bias vectors for backward input, output, forget, and cell gates
+ # RBb[iofc] - R bias vectors for backward input, output, forget, and cell gates
+ # PB[iof] - P peephole weight vector for backward input, output, and forget gates
+ # H - Hidden state
+
+ seq_length = len(X)
+ state_h_tensors = []
+
+ w_tensors = transformer.make_split(W, split_sizes=[hidden_size] * 4, axis=0)
+ W = {'i': w_tensors[0], 'o': w_tensors[1], 'f': w_tensors[2], 'c': w_tensors[3]}
+
+ r_tensors = transformer.make_split(R, split_sizes=[hidden_size] * 4, axis=0)
+ R = {'i': r_tensors[0], 'o': r_tensors[1], 'f': r_tensors[2], 'c': r_tensors[3]}
+
+ if B is not None:
+ separate_b_tensors = transformer.make_split(
+ B, split_sizes=[hidden_size] * 8, axis=0)
+ b_tensors = []
+ for i in range(4):
+ b_tensors += [
+ transformer.make_add(separate_b_tensors[i], separate_b_tensors[i + 4])
+ ]
+ else:
+ b_tensors = [
+ transformer.make_constant_tensor(
+ np.zeros((hidden_size), dtype=dtype), 'zero_b')
+ ] * 4
+ B = {'i': b_tensors[0], 'o': b_tensors[1], 'f': b_tensors[2], 'c': b_tensors[3]}
+
+ if initial_h is not None:
+ previous_h_state_tensor = initial_h
+ else:
+ previous_h_state_tensor = transformer.make_constant_tensor(
+ np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_h')
+
+ if initial_c is not None:
+ previous_c_state_tensor = initial_c
+ else:
+ previous_c_state_tensor = transformer.make_constant_tensor(
+ np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_c')
+
+ if P is not None:
+ p_tensors = transformer.make_split(P, split_sizes=[hidden_size] * 3, axis=0)
+ P = {'i': p_tensors[0], 'o': p_tensors[1], 'f': p_tensors[2]}
+ else:
+ zero = transformer.make_constant_tensor(
+ np.zeros((hidden_size), dtype=dtype), 'zero_peephole')
+ P = {'i': zero, 'o': zero, 'f': zero}
+
+ for i in range(seq_length):
+ # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+ it = transformer.make_gemm(X[i], W['i'], B['i'], trans_b=True)
+ it = transformer.make_gemm(previous_h_state_tensor, R['i'], it, trans_b=True)
+ peephole_it = transformer.make_mul(P['i'], previous_c_state_tensor)
+ it = transformer.make_add(it, peephole_it)
+ if clip is not None:
+ it = transformer.make_clip(it, min=-clip, max=clip)
+ it = transformer.make_act(it, act['f'])
+
+ # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+ ft = transformer.make_gemm(X[i], W['f'], B['f'], trans_b=True)
+ ft = transformer.make_gemm(previous_h_state_tensor, R['f'], ft, trans_b=True)
+ peephole_ft = transformer.make_mul(P['f'], previous_c_state_tensor)
+ ft = transformer.make_add(ft, peephole_ft)
+ if clip is not None:
+ ft = transformer.make_clip(ft, min=-clip, max=clip)
+ ft = transformer.make_act(ft, act['f'])
+
+ # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+ ct = transformer.make_gemm(X[i], W['c'], B['c'], trans_b=True)
+ ct = transformer.make_gemm(previous_h_state_tensor, R['c'], ct, trans_b=True)
+ if clip is not None:
+ ct = transformer.make_clip(ct, min=-clip, max=clip)
+ ct = transformer.make_act(ct, act['g'])
+
+ # Ct = ft (.) Ct-1 + it (.) ct
+ ft_Ct = transformer.make_mul(ft, previous_c_state_tensor)
+ it_ct = transformer.make_mul(it, ct)
+ Ct = transformer.make_add(ft_Ct, it_ct)
+ previous_c_state_tensor = Ct
+
+ # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+ ot = transformer.make_gemm(X[i], W['o'], B['o'], trans_b=True)
+ ot = transformer.make_gemm(previous_h_state_tensor, R['o'], ot, trans_b=True)
+ peephole_ot = transformer.make_mul(P['o'], Ct)
+ ot = transformer.make_add(ot, peephole_ot)
+ if clip is not None:
+ ot = transformer.make_clip(ot, min=-clip, max=clip)
+ ot = transformer.make_act(ot, act['f'])
+
+ # Ht = ot (.) h(Ct)
+ Ht = transformer.make_act(Ct, act['h'])
+ Ht = transformer.make_mul(ot, Ht)
+ previous_h_state_tensor = Ht
+ state_h_tensors += [Ht]
+
+ return (state_h_tensors, previous_c_state_tensor)
+
+
+def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
+ activations, clip, direction, hidden_size, layout):
+ """Generate Simple (forward or reverse) unrolled LSTM
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional LSTM operation to unroll
+ x (list of str): list of input tensors (input tensor split along "time" dimension)
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ activations (list of str): list of length 3 containing names of activation functions
+ clip (float or None): range which clips input of activations
+ direction (str): "forward" or "reverse"
+ hidden_size (int): size of hidden state
+ layout (int): See attribute description:
+ https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+ """
+
+ inputs = original_node.input
+ outputs = original_node.output
+ if direction == 'reverse':
+ x.reverse()
+ w = transformer.make_squeeze(inputs[1], axes=[0])
+ r = transformer.make_squeeze(inputs[2], axes=[0])
+
+ b = None
+ if len(inputs) > 3 and inputs[3] != '':
+ b = transformer.make_squeeze(inputs[3], axes=[0])
+
+ initial_h = None
+ if len(inputs) > 5 and inputs[5] != '':
+ direction_dim = layout
+ initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+
+ initial_c = None
+ if len(inputs) > 6 and inputs[6] != '':
+ direction_dim = layout
+ initial_c = transformer.make_squeeze(inputs[6], axes=[direction_dim])
+
+ p = None
+ if len(inputs) > 7 and inputs[7] != '':
+ p = transformer.make_squeeze(inputs[7], axes=[0])
+
+ dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+ batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+ act = {'f': activations[0], 'g': activations[1], 'h': activations[2]}
+
+ state_h_tensors, state_c_tensor = _generate_one_direction_LSTM(
+ transformer, x, w, r, b, initial_h, initial_c, p, clip, act, dtype, hidden_size,
+ batch_size)
+
+ y_direction_dim = layout + 1
+ y_h_direction_dim = layout
+ state_layout_tensors = []
+ seq_length_dim = layout
+ for h_state in state_h_tensors:
+ state_layout_tensors += [
+ transformer.make_unsqueeze(h_state, axes=[seq_length_dim, y_direction_dim])
+ ]
+
+ # use low-level interface to attach to existing tensors
+ Y_h = outputs[1]
+ transformer.make_node(
+ 'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+ Y_c = outputs[2]
+ transformer.make_node(
+ 'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
+ if direction == 'reverse':
+ state_layout_tensors.reverse()
+ Y = outputs[0]
+ transformer.make_node(
+ 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations,
+ clip, hidden_size, layout):
+ """Generate Bidirectional unrolled LSTM
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional LSTM operation to unroll
+ x (list of str): list of input tensors (input tensor split along "time" dimension)
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ activations (list of str): list of length 6, containing names of forward and reverse activations
+ clip (float or None): range which clips input of activations
+ hidden_size (int): size of hidden state
+ layout (int): See attribute description:
+ https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+ """
+
+ inputs = original_node.input
+ outputs = original_node.output
+
+ w = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+ r = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+ for d in range(2):
+ w[d] = transformer.make_squeeze(w[d], axes=[0])
+ r[d] = transformer.make_squeeze(r[d], axes=[0])
+
+ b = [None, None]
+ if len(inputs) > 3 and inputs[3] != '':
+ b = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+ for d in range(2):
+ b[d] = transformer.make_squeeze(b[d], axes=[0])
+
+ initial_h = [None, None]
+ if len(inputs) > 5 and inputs[5] != '':
+ direction_dim = layout
+ initial_h = transformer.make_split(
+ inputs[5], split_sizes=[1, 1], axis=direction_dim)
+ for d in range(2):
+ initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+ initial_c = [None, None]
+ if len(inputs) > 6 and inputs[6] != '':
+ direction_dim = layout
+ initial_c = transformer.make_split(
+ inputs[6], split_sizes=[1, 1], axis=direction_dim)
+ for d in range(2):
+ initial_c[d] = transformer.make_squeeze(initial_c[d], axes=[direction_dim])
+
+ p = [None, None]
+ if len(inputs) > 7 and inputs[7] != '':
+ p = transformer.make_split(inputs[7], split_sizes=[1, 1], axis=0)
+ for d in range(2):
+ p[d] = transformer.make_squeeze(p[d], axes=[0])
+
+ dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+ batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+ act = [{
+ 'f': activations[0],
+ 'g': activations[1],
+ 'h': activations[2]
+ }, {
+ 'f': activations[3],
+ 'g': activations[4],
+ 'h': activations[5]
+ }]
+
+ state_f_h_tensors, state_f_c_tensor = _generate_one_direction_LSTM(
+ transformer, x, w[0], r[0], b[0], initial_h[0], initial_c[0], p[0], clip, act[0],
+ dtype, hidden_size, batch_size)
+ x.reverse()
+ state_b_h_tensors, state_b_c_tensor = _generate_one_direction_LSTM(
+ transformer, x, w[1], r[1], b[1], initial_h[1], initial_c[1], p[1], clip, act[1],
+ dtype, hidden_size, batch_size)
+ state_b_h_tensors.reverse()
+
+ y_direction_dim = layout + 1
+ y_c_direction_dim = layout
+ state_layout_tensors = []
+ seq_length_dim = layout
+ for f_h_state, b_h_state in zip(state_f_h_tensors, state_b_h_tensors):
+ state_f_layout_tensors = transformer.make_unsqueeze(
+ f_h_state, axes=[seq_length_dim, y_direction_dim])
+ state_b_layout_tensors = transformer.make_unsqueeze(
+ b_h_state, axes=[seq_length_dim, y_direction_dim])
+ state_layout_tensors += [
+ transformer.make_concat(
+ [state_f_layout_tensors, state_b_layout_tensors], axis=y_direction_dim)
+ ]
+
+ last_f_state_layout_tensor = transformer.make_unsqueeze(
+ state_f_h_tensors[-1], axes=[y_c_direction_dim])
+ last_b_state_layout_tensor = transformer.make_unsqueeze(
+ state_b_h_tensors[0], axes=[y_c_direction_dim])
+
+ Y_h = outputs[1]
+ transformer.make_node(
+ 'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+ axis=y_c_direction_dim)
+
+ Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
+ Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
+ Y_c = outputs[2]
+ transformer.make_node(
+ 'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
+
+ Y = outputs[0]
+ transformer.make_node(
+ 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_LSTM(transformer, tensor_infos, node):
+ """Unroll LSTM operation
+
+ Args:
+ transformer (_ModelTransformerHelper): transformation helper
+ tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+ node (onnx.onnx_ml_pb2.NodeProto): LSTM operation to unroll
+ """
+ inputs = node.input
+ if len(inputs) > 4 and inputs[4] != '':
+ raise NotImplementedError('Variadic length of output is not supported')
+ # attributes
+ activation_alpha = []
+ activation_beta = []
+ activations = ['Sigmoid', 'Tanh', 'Tanh'] * 2
+ clip = None
+ direction = 'forward'
+ hidden_size = 0
+ input_forget = 0
+ layout = 0
+
+ for attr in node.attribute:
+ if attr.name == 'activation_alpha':
+ activation_alpha = attr.floats
+ if attr.name == 'activation_beta':
+ activation_beta = attr.floats
+ if attr.name == 'activations':
+ activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+ if attr.name == 'clip':
+ clip = attr.f
+ if attr.name == 'direction':
+ direction = attr.s.decode('UTF-8')
+ if attr.name == 'hidden_size':
+ hidden_size = attr.i
+ if attr.name == 'input_forget':
+ input_forget = attr.i
+ if attr.name == 'layout':
+ layout = attr.i
+
+ if len(activation_alpha) > 0 or len(activation_beta) > 0:
+ raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+ for act in activations:
+ if act not in ['Relu', 'Tanh', 'Sigmoid']:
+ raise NotImplementedError('Unsupported activation function')
+
+ if input_forget != 0:
+ raise NotImplementedError('Unsupported input_forget attribute value')
+
+ seq_length_dim = layout
+ seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+ if hidden_size == 0:
+ hidden_size = tensor_infos[inputs[2]].shape[2]
+
+ input_split_tensor = transformer.make_split(
+ inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+ x = []
+ for i in range(len(input_split_tensor)):
+ input_frame_tensor = input_split_tensor[i]
+ squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+ x += [squeezed_frame_tensor]
+
+ if direction in ['forward', 'reverse']:
+ _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+ clip, direction, hidden_size, layout)
+ elif direction == 'bidirectional':
+ _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+ clip, hidden_size, layout)
+ else:
+ raise RuntimeError('Unknown LSTM type')
+
+ transformer.mark_for_deletion(node)
+
+
+def legalize(model, options):
+ """Replace selected operations in onnx model
+
+ Replaces operations, selected by given options with different operation sequences.
+ For example remove unsupported parts of graph with sequences of supported operations.
+
+ Note that graph is changes inplace.
+
+ Args:
+ model (onnx.onnx_ml_pb2.ModelProto): target model
+ options (LegalizeOptions):
+ """
+ tensor_infos = _get_tensor_infos(model)
+
+ transformer = _ModelTransformerHelper(model)
+
+ node_id = 0
+ while node_id < len(model.graph.node):
+ node = model.graph.node[node_id]
+ if node.op_type == 'RNN' and options.unroll_rnn:
+ # opset version is required by split operation
+ if model.opset_import[0].version >= 13:
+ raise NotImplementedError(
+ 'Can not generate code with opcode version 13 and greater')
+ transformer.set_insert_id(node_id)
+ _legalize_RNN(transformer, tensor_infos, node)
+ node_id = transformer.get_insert_id()
+ elif node.op_type == 'LSTM' and options.unroll_lstm:
+ if model.opset_import[0].version >= 13:
+ raise NotImplementedError(
+ 'Can not generate code with opcode version 13 and greater')
+ transformer.set_insert_id(node_id)
+ _legalize_LSTM(transformer, tensor_infos, node)
+ node_id = transformer.get_insert_id()
+ node_id += 1
+
+ transformer.delete_marked_nodes()
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 3:
+ print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
+ '\n'
+ ' In stand-alone utility mode this tool provides basic funtionality\n'
+ ' If you want to have more control over applied transformations, use this legalizer as a library')
+ exit(1)
+ options = LegalizeOptions()
+ options.unroll_lstm = True
+ options.unroll_rnn = True
+ model = onnx.load(sys.argv[1])
+ legalize(model, options)
+ onnx.save(model, sys.argv[2])
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt
index 6f9f2847e..caea756c2 100644
--- a/compiler/one-cmds/tests/CMakeLists.txt
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -3,6 +3,7 @@
# Gather test scripts
file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg")
+file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json")
# Create a script to run the tests at installation folder
set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
@@ -39,6 +40,11 @@ foreach(CONFIGITEM IN ITEMS ${CONFIGITEMS})
install(FILES ${CONFIGITEM} DESTINATION test)
endforeach(CONFIGITEM)
+foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS})
+ get_filename_component(ITEM_PREFIX ${QCONFIGITEM} NAME_WE)
+ install(FILES ${QCONFIGITEM} DESTINATION test)
+endforeach(QCONFIGITEM)
+
file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
file(APPEND "${DRIVER_SCRIPT}"
@@ -52,6 +58,8 @@ fi\n
set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py")
+set(ONNX_LEGALIZE_RUN_COMPARE "${CMAKE_CURRENT_SOURCE_DIR}/onnx_legalize_run_compare.py")
+set(PRINT_ONNX_MODEL "${CMAKE_CURRENT_SOURCE_DIR}/print_onnx_model.py")
install(FILES ${DRIVER_SCRIPT}
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
@@ -71,5 +79,23 @@ install(FILES ${PREPROCESS_IMAGES_PY}
WORLD_READ
DESTINATION test)
+install(FILES ${ONNX_LEGALIZE_RUN_COMPARE}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
+install(FILES ${PRINT_ONNX_MODEL}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.txt
DESTINATION test)
+
+add_subdirectory(onnx-operations)
+
+if(ENABLE_ONE_IMPORT_PYTORCH)
+ add_subdirectory(pytorch-operations)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
diff --git a/compiler/one-cmds/tests/one-quantize_009.qconf.json b/compiler/one-cmds/tests/one-quantize_009.qconf.json
new file mode 100644
index 000000000..ac274e83a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.qconf.json
@@ -0,0 +1,36 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "InceptionV3/InceptionV3/Mixed_5b/concat",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "InceptionV3/InceptionV3/Mixed_7c/concat",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "InceptionV3/Predictions/Reshape_1",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_009.test b/compiler/one-cmds/tests/one-quantize_009.test
new file mode 100644
index 000000000..aa0670350
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.mixed.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+ /bin/bash one-import_001.test > /dev/null 2>&1
+ return_code=$?
+ if [[ ${return_code} != 0 ]]; then
+ trap_err_onexit
+ fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_009.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt
new file mode 100644
index 000000000..e6b2b354a
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt
@@ -0,0 +1,86 @@
+# Install one-cmds test scripts for onnx models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+
+set(TEST_DST test/onnx-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(ONNX_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+ set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+ # generate test script
+ file(WRITE "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+ file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+ file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+ file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+ file(APPEND "${TEST_SCRIPT}" "{\n")
+ file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} FAILED\"\n")
+ file(APPEND "${TEST_SCRIPT}" "exit 255\n")
+ file(APPEND "${TEST_SCRIPT}" "}\n")
+ file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+ file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+ file(APPEND "${TEST_SCRIPT}" "one-import-onnx --input_path=${TEST_ITEM}.onnx --output_path=${TEST_ITEM}.circle\
+ ${ONNX_IMPORT_OPTIONS} &> /dev/null\n")
+ file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+ file(APPEND "${TEST_SCRIPT}" "trap_err_onexit\n")
+ file(APPEND "${TEST_SCRIPT}" "fi\n")
+ file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+ install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" " USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" " export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+ file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+ echo \"$fail_count TESTS FAILED\"
+ exit 255
+else
+ echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+
+install(FILES "${DRIVER_SCRIPT}"
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+ DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/onnx-operations/README.md b/compiler/one-cmds/tests/onnx-operations/README.md
new file mode 100644
index 000000000..928fb84dd
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/README.md
@@ -0,0 +1,28 @@
+## Overview
+
+This directory contains auxilliary tests for small onnx target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+ - you need to run this only once
+ - read 'doc/how-to-prepare-virtualenv.txt' for more information
+ ```
+ bin/one-prepare-venv
+ ```
+2) run 'test/onnx-operations/prepare_test_materials.sh' to download test material models
+ - you need to run this only once
+ - you need internet connection to download files
+ - you may need to install 'wget' and 'unzip' packages
+ ```
+ test/onnx-operations/prepare_test_materials.sh
+ ```
+3) run 'test/onnx-operations/runtestall.sh' to run the test
+ ```
+ test/onnx-operations/runtestall.sh
+ ```
diff --git a/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh
new file mode 100644
index 000000000..274a60f0a
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+ python3 ptem.py $(basename ${test_case})
+done
+
+cp output/*.onnx .
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/onnx_legalize_run_compare.py b/compiler/one-cmds/tests/onnx_legalize_run_compare.py
new file mode 100644
index 000000000..9b02b74af
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx_legalize_run_compare.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnxruntime as rt
+import onnx
+import sys
+import numpy as np
+import importlib.util
+
+
+def _generate_inputs(model):
+ """Generate random inputs for given model
+
+ Args:
+ model (onnx.onnx_ml_pb2.ModelProto): target model
+
+ Returns:
+ dict from str to numpy.ndarray: generated inputs
+ """
+ inputs = {}
+ for input in model.graph.input:
+ # check if elem type is float32
+ # list of types could be extended, this is a property of current testsuite
+ assert (
+ input.type.tensor_type.elem_type == onnx.TensorProto.DataType.Value("FLOAT"))
+ input_shape = []
+ for dim in input.type.tensor_type.shape.dim:
+ input_shape += [dim.dim_value]
+ inputs[input.name] = np.random.random(input_shape).astype(np.float32)
+ return inputs
+
+
+def _run_model(model, inputs):
+ """Run given model
+
+ Args:
+ model (onnx.onnx_ml_pb2.ModelProto): target model
+ inputs (dict from str to numpy.ndarray): sample inputs
+
+ Returns:
+ list of numpy.ndarray: inference outputs
+ """
+ output_names = list(map(lambda output: output.name, model.graph.output))
+ session = rt.InferenceSession(model.SerializeToString())
+ outputs = session.run(output_names, inputs)
+ return outputs
+
+
+def _compare_results(ref_outputs, test_outputs, tolerance):
+ """Generate random inputs for given model
+
+ Args:
+ ref_outputs (list of numpy.ndarray): reference values (original model results)
+ test_outputs (list of numpy.ndarray): tested values (modified model results)
+ tolerance (float): maximum acceptable relative difference
+
+ Returns:
+ bool: True if outputs considered equal, False otherwise
+ """
+ num_outputs = len(ref_outputs)
+ assert (len(test_outputs) == num_outputs)
+ for i in range(num_outputs):
+ if ref_outputs[i].shape != test_outputs[i].shape:
+ print("output {} shape mismatch: ref({}) vs test({})".format(
+ i, ref_outputs[i].shape, test_outputs[i].shape))
+ return False
+
+ abs_difference = np.abs(ref_outputs[i] - test_outputs[i])
+ abs_ref_maximum = np.abs(ref_outputs[i]).max()
+ peak_error = abs_difference.max() / abs_ref_maximum
+
+ if peak_error > tolerance:
+ print("output {} peak error to value ratio {} is too big".format(
+ i, peak_error))
+ return False
+ return True
+
+
+if __name__ == '__main__':
+ if len(sys.argv) < 6:
+ exit('expecting 5 arguments:\n'
+ ' - path to input model\n'
+ ' - path to "legalized" model\n'
+ ' - path to onnx_legalizer.py\n'
+ ' - base name for generated test inputs\n'
+ ' - output tolerance')
+ input_model_path = sys.argv[1]
+ output_model_path = sys.argv[2]
+ onnx_legalizer_path = sys.argv[3]
+ input_dump_path = sys.argv[4]
+ tolerance = float(sys.argv[5])
+
+ onnx_legalizer_spec = importlib.util.spec_from_file_location(
+ "onnx_legalizer", onnx_legalizer_path)
+ onnx_legalizer = importlib.util.module_from_spec(onnx_legalizer_spec)
+ onnx_legalizer_spec.loader.exec_module(onnx_legalizer)
+
+ model = onnx.load(input_model_path)
+
+ inputs = _generate_inputs(model)
+
+ for i in inputs:
+ np.save('{}_{}.npy'.format(input_dump_path, i), inputs[i])
+
+ ref_outputs = _run_model(model, inputs)
+
+ options = onnx_legalizer.LegalizeOptions()
+ options.unroll_rnn = True
+ options.unroll_lstm = True
+ onnx_legalizer.legalize(model, options)
+
+ with open(output_model_path, 'wb') as f:
+ f.write(model.SerializeToString())
+
+ test_outputs = _run_model(model, inputs)
+
+ if not _compare_results(ref_outputs, test_outputs, tolerance):
+ exit('comparison failed')
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh
index 7f269530c..c80c59834 100644
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -91,6 +91,39 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
# https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
fi
+function files_missing() {
+ condition="test "
+
+ for f in "${@}"; do
+ condition="${condition} ! -s ${f} -o"
+ done
+
+ # last condition is always false to properly close last "or"
+ condition="${condition} -z non_zero_string "
+ ${condition}
+}
+
+declare -a TEST_RECCURENT_MODELS=(\
+ "RNN.onnx" "RNN-nobias.onnx" "RNN-relu.onnx" "RNN-bi.onnx" "RNN-noinit.onnx"\
+ "LSTM.onnx" "LSTM-bi.onnx" "LSTM-noinit.onnx" "LSTM-nobias.onnx"
+)
+
+if files_missing "${TEST_RECCURENT_MODELS[@]}"; then
+ rm -rf test_onnx_recurrent_models.zip
+ wget https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
+ unzip test_onnx_recurrent_models.zip
+ # https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097
+fi
+
+declare -a NEG_TEST_RECCURENT_MODELS=("rnn_variable.onnx" "lstm_variable.onnx")
+
+if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then
+ rm -rf neg_test_onnx_recurrent_models.zip
+ wget https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
+ unzip neg_test_onnx_recurrent_models.zip
+ # https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375
+fi
+
# prepare 'inception_v3.circle' file used for quantization test
inputfile="./inception_v3.pb"
outputfile="./inception_v3.circle"
diff --git a/compiler/one-cmds/tests/print_onnx_model.py b/compiler/one-cmds/tests/print_onnx_model.py
new file mode 100644
index 000000000..ecab0f6da
--- /dev/null
+++ b/compiler/one-cmds/tests/print_onnx_model.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import sys
+
+if __name__ == '__main__':
+ model = onnx.load(sys.argv[1])
+ print(model)
diff --git a/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt
new file mode 100644
index 000000000..10f30a5c9
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt
@@ -0,0 +1,109 @@
+# Install one-cmds test scripts for pytorch models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+file(GLOB SPECIAL_TEST_ITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
+
+set(TEST_DST test/pytorch-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(PYTORCH_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+ set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+ # generate test script
+ file(WRITE "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+ file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+ file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+ file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+ file(APPEND "${TEST_SCRIPT}" "{\n")
+ file(APPEND "${TEST_SCRIPT}" " echo \"\${filename_ext} FAILED\"\n")
+ file(APPEND "${TEST_SCRIPT}" " exit 255\n")
+ file(APPEND "${TEST_SCRIPT}" "}\n")
+ file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+ file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+ file(APPEND "${TEST_SCRIPT}" "input_shapes=\$(head -n 1 ${TEST_ITEM}.spec)\n")
+ file(APPEND "${TEST_SCRIPT}" "input_types=\$(tail -n 1 ${TEST_ITEM}.spec)\n")
+ file(APPEND "${TEST_SCRIPT}" "one-import-pytorch --input_path=${TEST_ITEM}.pth --output_path=${TEST_ITEM}.circle\
+ ${PYTORCH_IMPORT_OPTIONS} --input_shapes=\${input_shapes} --input_types=\${input_types} &> /dev/null\n")
+ file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+ file(APPEND "${TEST_SCRIPT}" " trap_err_onexit\n")
+ file(APPEND "${TEST_SCRIPT}" "fi\n")
+ file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+ install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" " USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" " export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+ file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "\necho \"special test items\" | tee -a runtestall.log\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${SPECIAL_TEST_ITEMS})
+ file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+ echo \"$fail_count TESTS FAILED\"
+ exit 255
+else
+ echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+set(EXAMPLE_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/example_generator.py")
+set(AUX_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/aux_generator.py")
+
+install(FILES "${DRIVER_SCRIPT}"
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES "${EXAMPLE_GENERATOR}" "${AUX_GENERATOR}"
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES ${SPECIAL_TEST_ITEMS}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+ DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/pytorch-operations/README.md b/compiler/one-cmds/tests/pytorch-operations/README.md
new file mode 100644
index 000000000..231a10eb4
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/README.md
@@ -0,0 +1,28 @@
+## Overview
+
+This directory contains auxilliary tests for small pytorch target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+ - you need to run this only once
+ - read 'doc/how-to-prepare-virtualenv.txt' for more information
+ ```
+ bin/one-prepare-venv
+ ```
+2) run 'test/pytorch-operations/prepare_test_materials.sh' to download test material models
+ - you need to run this only once
+ - you need internet connection to download files
+ - you may need to install 'wget' and 'unzip' packages
+ ```
+ test/pytorch-operations/prepare_test_materials.sh
+ ```
+3) run 'test/pytorch-operations/runtestall.sh' to run the test
+ ```
+ test/pytoch-operations/runtestall.sh
+ ```
diff --git a/compiler/one-cmds/tests/pytorch-operations/aux_generator.py b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py
new file mode 100644
index 000000000..6c9afcded
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch aux tests generator
+
+import torch
+import torch.nn as nn
+import json
+import zipfile
+import os
+
+
+# model
+class net_abs(nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, input):
+ return torch.abs(input)
+
+
+if __name__ == '__main__':
+ model = net_abs()
+ # save "entire" model for entire_model.test
+ torch.save(model, 'entire_model.pth')
+
+ # save state_dict file for state_dict_model.test
+ state_dict_path = 'state_dict_model.pth'
+ torch.save(model.state_dict(), state_dict_path)
+
+ # create files for mar_torchscript_model.test
+ torchscript_path = 'torchscript_model.pth'
+ inp = torch.randn(1, 2, 3, 3)
+ traced_model = torch.jit.trace(model, inp)
+ torch.jit.save(traced_model, torchscript_path)
+ # create manifest
+ manifest = {}
+ manifest['createdOn'] = '11/11/1111 11:11:11'
+ manifest['runtime'] = 'python'
+ manifest['model'] = {}
+ manifest['model']['modelName'] = 'torchscript_model',
+ manifest['model']['serializedFile'] = torchscript_path
+ manifest['model']['handler'] = 'image_classifier'
+ manifest['model']['modelVersion'] = '1.0'
+ manifest['archiverVersion'] = '0.4.2'
+
+ with zipfile.ZipFile('mar_torchscript_model.mar', 'w') as mar_file:
+ with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+ manifest_file.write(json.dumps(manifest).encode())
+ mar_file.write(torchscript_path)
+
+ # create files for mar_state_dict_model.test
+ model_file_path = os.path.basename(__file__)
+ # create manifest
+ manifest = {}
+ manifest['createdOn'] = '11/11/1111 11:11:11'
+ manifest['runtime'] = 'python'
+ manifest['model'] = {}
+ manifest['model']['modelName'] = 'state_dict_model',
+ manifest['model']['serializedFile'] = state_dict_path
+ manifest['model']['handler'] = 'image_classifier'
+ manifest['model']['modelFile'] = model_file_path
+ manifest['model']['modelVersion'] = '1.0'
+ manifest['archiverVersion'] = '0.4.2'
+
+ with zipfile.ZipFile('mar_state_dict_model.mar', 'w') as mar_file:
+ with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+ manifest_file.write(json.dumps(manifest).encode())
+ mar_file.write(state_dict_path)
+ mar_file.write(model_file_path)
diff --git a/compiler/one-cmds/tests/pytorch-operations/entire_model.test b/compiler/one-cmds/tests/pytorch-operations/entire_model.test
new file mode 100644
index 000000000..a72a56ffd
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/entire_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model stored in python file and serialized "entire" model.
+# "Entire" model is serialized with `torch.save(model)` method.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="entire_model.circle"
+
+# run test
+one-import-pytorch --input_path=entire_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/example_generator.py b/compiler/one-cmds/tests/pytorch-operations/example_generator.py
new file mode 100644
index 000000000..20a80c895
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/example_generator.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch Example manager
+
+import torch
+import importlib
+import argparse
+import os
+
+from pathlib import Path
+
+print("PyTorch version=", torch.__version__)
+
+parser = argparse.ArgumentParser(description='Process PyTorch python examples')
+
+parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
+
+args = parser.parse_args()
+
+output_folder = "./"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+
+class JitWrapper(torch.nn.Module):
+ def __init__(self, model):
+ super().__init__()
+ self.model = model
+
+ def forward(self, *args):
+ if len(args) == 1:
+ return self.model.forward(args[0])
+ else:
+ return self.model.forward(args)
+
+
+for example in args.examples:
+ print("Generate '" + example + ".pth'", end='')
+ # load example code
+ # replace - with _ in name, otherwise pytorch generates invalid torchscript
+ module_name = "examples." + example.replace('-', '_')
+ module_loader = importlib.machinery.SourceFileLoader(
+ module_name, os.path.join("examples", example, "__init__.py"))
+ module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+ module = importlib.util.module_from_spec(module_spec)
+ module_loader.exec_module(module)
+
+ jittable_model = JitWrapper(module._model_)
+
+ traced_model = torch.jit.trace(jittable_model, module._dummy_)
+ # save .pth
+ torch.jit.save(traced_model, output_folder + example + ".pth")
+
+ input_shapes = ""
+ input_types = ""
+
+ input_samples = module._dummy_
+ if isinstance(input_samples, torch.Tensor):
+ input_samples = [input_samples]
+ for inp_idx in range(len(input_samples)):
+ input_data = input_samples[inp_idx]
+
+ shape = input_data.shape
+ for dim in range(len(shape)):
+ input_shapes += str(shape[dim])
+ if dim != len(shape) - 1:
+ input_shapes += ","
+
+ if input_data.dtype == torch.bool:
+ input_types += "bool"
+ elif input_data.dtype == torch.uint8:
+ input_types += "uint8"
+ elif input_data.dtype == torch.int8:
+ input_types += "int8"
+ elif input_data.dtype == torch.int16:
+ input_types += "int16"
+ elif input_data.dtype == torch.int32:
+ input_types += "int32"
+ elif input_data.dtype == torch.int64:
+ input_types += "int16"
+ elif input_data.dtype == torch.float16:
+ input_types += "float32"
+ elif input_data.dtype == torch.float32:
+ input_types += "float32"
+ elif input_data.dtype == torch.float64:
+ input_types += "float64"
+ elif input_data.dtype == torch.complex64:
+ input_types += "complex64"
+ elif input_data.dtype == torch.complex128:
+ input_types += "complex128"
+ else:
+ raise ValueError('unsupported dtype')
+
+ if inp_idx != len(input_samples) - 1:
+ input_shapes += ":"
+ input_types += ","
+
+ with open(example + ".spec", "w") as spec_file:
+ print(input_shapes, file=spec_file)
+ print(input_types, file=spec_file)
+
+ print(" - Done")
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test
new file mode 100644
index 000000000..9892dbbed
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains python source of the model and serialized state_dict.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_state_dict_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test
new file mode 100644
index 000000000..3ac38a42e
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains TorchScript.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_torchscript_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh
new file mode 100644
index 000000000..5f38610d7
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+ python3 example_generator.py $(basename ${test_case})
+done
+
+python3 aux_generator.py
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test
new file mode 100644
index 000000000..ecd2a8112
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model from .py file and serialized state_dict file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=state_dict_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test
new file mode 100644
index 000000000..590e5b369
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import TorchScript file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=torchscript_model.pth --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index 5d84c2bd5..be0322aca 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -17,80 +17,13 @@
import argparse
import configparser
import glob
+import importlib
import ntpath
import os
import subprocess
import sys
-
-class _CONSTANT:
- __slots__ = () # This prevents access via __dict__.
- OPTIMIZATION_OPTS = (
- # (OPTION_NAME, HELP_MESSAGE)
- ('O1', 'enable O1 optimization pass'),
- ('convert_nchw_to_nhwc',
- 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
- ),
- ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
- ('nchw_to_nhwc_input_shape',
- 'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
- ('nchw_to_nhwc_output_shape',
- 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
- ('fold_add_v2', 'fold AddV2 op with constant inputs'),
- ('fold_cast', 'fold Cast op with constant input'),
- ('fold_dequantize', 'fold Dequantize op'),
- ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
- ('fold_sparse_to_dense', 'fold SparseToDense op'),
- ('forward_reshape_to_unaryop', 'Forward Reshape op'),
- ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
- ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
- ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
- ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
- ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
- ('fuse_bcq', 'apply Binary Coded Quantization'),
- ('fuse_preactivation_batchnorm',
- 'fuse BatchNorm operators of pre-activations to Convolution op'),
- ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
- ('fuse_transpose_with_mean',
- 'fuse Mean with a preceding Transpose under certain conditions'),
- ('make_batchnorm_gamma_positive',
- 'make negative gamma of BatchNorm to a small positive value (1e-10).'
- ' Note that this pass can change the execution result of the model.'
- ' So, use it only when the impact is known to be acceptable.'),
- ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
- ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
- ('replace_cw_mul_add_with_depthwise_conv',
- 'replace channel-wise Mul/Add with DepthwiseConv2D'),
- ('remove_fakequant', 'remove FakeQuant ops'),
- ('remove_quantdequant', 'remove Quantize-Dequantize sequence'),
- ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
- ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
- ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
- ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
- ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
- ('remove_unnecessary_split', 'remove unnecessary split ops'),
- ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
- ('resolve_customop_batchmatmul',
- 'convert Custom(BatchMatmul) op to BatchMatmul op'),
- ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
- ('resolve_customop_max_pool_with_argmax',
- 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
- ('shuffle_weight_to_16x1float32',
- 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
- ' Note that it only converts weights whose row is a multiple of 16'),
- ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
- ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
- ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
- ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
- ('substitute_strided_slice_to_reshape',
- 'convert certain condition StridedSlice to Reshape'),
- ('substitute_transpose_to_reshape',
- 'convert certain condition Transpose to Reshape'),
- ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
- ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
-
-
-_CONSTANT = _CONSTANT()
+import onelib.constant as _constant
def _add_default_arg(parser):
@@ -116,7 +49,10 @@ def _add_default_arg(parser):
def is_accumulated_arg(arg, driver):
if driver == "one-quantize":
- if arg == "tensor_name" or arg == "scale" or arg == "zero_point":
+ accumulables = [
+ "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
+ ]
+ if arg in accumulables:
return True
return False
@@ -189,83 +125,6 @@ def _parse_cfg(args, driver_name):
setattr(args, key, config[secton_to_run][key])
-def _make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
- """make a command for running tf2tfliteV2.py"""
- cmd = [sys.executable, os.path.expanduser(driver_path)]
- # verbose
- if _is_valid_attr(args, 'verbose'):
- cmd.append('--verbose')
- # model_format
- if _is_valid_attr(args, 'model_format_cmd'):
- cmd.append(getattr(args, 'model_format_cmd'))
- elif _is_valid_attr(args, 'model_format'):
- cmd.append('--' + getattr(args, 'model_format'))
- else:
- cmd.append('--graph_def') # default value
- # converter version
- if _is_valid_attr(args, 'converter_version_cmd'):
- cmd.append(getattr(args, 'converter_version_cmd'))
- elif _is_valid_attr(args, 'converter_version'):
- cmd.append('--' + getattr(args, 'converter_version'))
- else:
- cmd.append('--v1') # default value
- # input_path
- if _is_valid_attr(args, 'input_path'):
- cmd.append('--input_path')
- cmd.append(os.path.expanduser(input_path))
- # output_path
- if _is_valid_attr(args, 'output_path'):
- cmd.append('--output_path')
- cmd.append(os.path.expanduser(output_path))
- # input_arrays
- if _is_valid_attr(args, 'input_arrays'):
- cmd.append('--input_arrays')
- cmd.append(getattr(args, 'input_arrays'))
- # input_shapes
- if _is_valid_attr(args, 'input_shapes'):
- cmd.append('--input_shapes')
- cmd.append(getattr(args, 'input_shapes'))
- # output_arrays
- if _is_valid_attr(args, 'output_arrays'):
- cmd.append('--output_arrays')
- cmd.append(getattr(args, 'output_arrays'))
-
- return cmd
-
-
-def _make_tflite2circle_cmd(driver_path, input_path, output_path):
- """make a command for running tflite2circle"""
- cmd = [driver_path, input_path, output_path]
- return [os.path.expanduser(c) for c in cmd]
-
-
-def _make_circle2circle_cmd(args, driver_path, input_path, output_path):
- """make a command for running circle2circle"""
- cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
- # profiling
- if _is_valid_attr(args, 'generate_profile_data'):
- cmd.append('--generate_profile_data')
- # optimization pass(only true/false options)
- # TODO support options whose number of arguments is more than zero
- for opt in _CONSTANT.OPTIMIZATION_OPTS:
- if _is_valid_attr(args, opt[0]):
- # ./driver --opt[0]
- if type(getattr(args, opt[0])) is bool:
- cmd.append('--' + opt[0])
- """
- This condition check is for config file interface, usually would be
- SomeOption=True
- but user can write as follows while development
- SomeOption=False
- instead of removing SomeOption option
- """
- if type(getattr(args, opt[0])) is str and not getattr(
- args, opt[0]).lower() in ['false', '0', 'n']:
- cmd.append('--' + opt[0])
-
- return cmd
-
-
def _print_version_and_exit(file_path):
"""print version of the file located in the file_path"""
script_path = os.path.realpath(file_path)
@@ -368,3 +227,34 @@ def _get_optimization_list(get_name=False):
opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
return opt_list
+
+
+def _detect_one_import_drivers(search_path):
+ """Looks for import drivers in given directory
+
+ Args:
+ search_path: path to the directory where to search import drivers
+
+ Returns:
+ dict: each entry is related to single detected driver,
+ key is a config section name, value is a driver name
+
+ """
+ import_drivers_dict = {}
+ for module_name in os.listdir(search_path):
+ full_path = os.path.join(search_path, module_name)
+ if not os.path.isfile(full_path):
+ continue
+ if module_name.find("one-import-") != 0:
+ continue
+ module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
+ module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+ module = importlib.util.module_from_spec(module_spec)
+ try:
+ module_loader.exec_module(module)
+ if hasattr(module, "get_driver_cfg_section"):
+ section = module.get_driver_cfg_section()
+ import_drivers_dict[section] = module_name
+ except:
+ pass
+ return import_drivers_dict
diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt
index 418bc27ac..951194d9d 100644
--- a/compiler/oneco/CMakeLists.txt
+++ b/compiler/oneco/CMakeLists.txt
@@ -22,11 +22,11 @@ target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
target_link_libraries(moco_onnx_frontend PUBLIC loco)
target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
add_executable(moco_onnx_frontend_test ${TESTS})
target_include_directories(moco_onnx_frontend_test PRIVATE src)
diff --git a/compiler/pepper-strcast/CMakeLists.txt b/compiler/pepper-strcast/CMakeLists.txt
index 5f87e9488..bcc07f482 100644
--- a/compiler/pepper-strcast/CMakeLists.txt
+++ b/compiler/pepper-strcast/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(pepper_strcast STATIC ${SOURCES})
-set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(pepper_strcast PUBLIC include)
target_link_libraries(pepper_strcast PRIVATE nncc_common)
target_link_libraries(pepper_strcast PUBLIC nncc_coverage)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt
index 00ffb57de..51fd9a391 100644
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,7 +1,9 @@
unset(QUANTIZATION_VALUE_TEST)
unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
+unset(QUANTIZATION_CONFIG_VALUE_TEST)
+unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM)
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
return()
@@ -12,6 +14,11 @@ macro(addTest NAME GRANULARITY DTYPE)
list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
endmacro(addTest)
+macro(addQConfTest NAME GRANULARITY DTYPE)
+ list(APPEND QUANTIZATION_CONFIG_VALUE_TEST ${NAME})
+ list(APPEND QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
+endmacro(addQConfTest)
+
# Read "test.lst"
include("test.lst")
# Read "test.local.lst" if exists
@@ -20,12 +27,12 @@ include("test.local.lst" OPTIONAL)
unset(TEST_DEPS)
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR)
+get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
"${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
###
### Generate test.config
@@ -89,5 +96,22 @@ add_test(
${QUANTIZATION_VALUE_TEST_WITH_PARAM}
)
+add_test(
+ NAME pota_fake_wquant_test_with_config
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_fake_wquant_with_config.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
+ NAME pota_quantization_test_with_config
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config)
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json
new file mode 100644
index 000000000..174d6e9b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "out",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json
new file mode 100644
index 000000000..733f46e60
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "out",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json
new file mode 100644
index 000000000..630c3e420
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json
@@ -0,0 +1,14 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm1",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "ofm2",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json
new file mode 100644
index 000000000..cc98d7c62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json
@@ -0,0 +1,14 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm1",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "ofm2",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "layer"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..a223fa4aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.038489170372486115,
+ "zero_point": 129.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 136,
+ 153,
+ 68
+ ],
+ [
+ 51,
+ 34,
+ 221
+ ]
+ ],
+ [
+ [
+ 0,
+ 255,
+ 187
+ ],
+ [
+ 85,
+ 170,
+ 102
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.05882352963089943,
+ "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..afa9b1a8e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0892433300614357,
+ "zero_point": 134.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..a7298cb58
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014653272228315473,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 4096,
+ 8192,
+ -12288
+ ],
+ [
+ -16384,
+ -20479,
+ 24575
+ ]
+ ],
+ [
+ [
+ -28671,
+ 32767,
+ 16384
+ ],
+ [
+ -8192,
+ 12288,
+ -4096
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.0002441480755805969,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..3cb0552e9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00037035736022517085,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..0528cc9cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03911808878183365,
+ "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..ac5da0bda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.027372928336262703,
+ "zero_point": 141.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..353f15a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0001523942337371409,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..c4ace78d4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00012122748012188822,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.05882352963089943,
+ "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..17ba25363
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 136,
+ 153
+ ],
+ [
+ 68,
+ 51
+ ]
+ ],
+ [
+ [
+ 34,
+ 221
+ ],
+ [
+ 0,
+ 255
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.05882352963089943,
+ "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.05882352963089943,
+ "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0002441480755805969,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..53d7cdba3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 4096,
+ 8192
+ ],
+ [
+ -12288,
+ -16384
+ ]
+ ],
+ [
+ [
+ -20479,
+ 24575
+ ],
+ [
+ -28671,
+ 32767
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.0002441480755805969,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0002441480755805969,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..2558bb2be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1.0039215087890625,
+ 2.007843017578125
+ ],
+ [
+ -3.0117650032043457,
+ -4.015686511993408
+ ]
+ ],
+ [
+ [
+ -5.019608497619629,
+ 6.023530006408691
+ ],
+ [
+ -7.027451515197754,
+ 7.9686279296875
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 4.01568603515625,
+ -2.007843494415283
+ ],
+ [
+ 3.0117645263671875,
+ -1.0039215087890625
+ ]
+ ],
+ [
+ [
+ -7.9686279296875,
+ -6.023530006408691
+ ],
+ [
+ 7.027451515197754,
+ 5.019608497619629
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..50d44ece7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json
@@ -0,0 +1,7 @@
+{
+ "weights": [
+ 4069,
+ 8138
+ ],
+ "scale": 0.0002457468386200985
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..24508860d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.003916590008884668,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..b249a0ce5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 143,
+ 159
+ ],
+ [
+ 79,
+ 63
+ ]
+ ],
+ [
+ [
+ 47,
+ 223
+ ],
+ [
+ 15,
+ 254
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 191,
+ 95
+ ],
+ [
+ 175,
+ 111
+ ]
+ ],
+ [
+ [
+ 0,
+ 31
+ ],
+ [
+ 239,
+ 207
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.062745101749897,
+ "zero_point": 127.0,
+ "min": -7.9686279296875,
+ "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..a2dd6681f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.037479765713214874,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..8817cbef7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1.000030517578125,
+ 2.00006103515625
+ ],
+ [
+ -3.000091552734375,
+ -4.0001220703125
+ ]
+ ],
+ [
+ [
+ -4.999908447265625,
+ 5.99993896484375
+ ],
+ [
+ -6.999969482421875,
+ 8.0
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 4.0001220703125,
+ -2.00006103515625
+ ],
+ [
+ 3.000091552734375,
+ -1.000030517578125
+ ]
+ ],
+ [
+ [
+ -8.0,
+ -5.99993896484375
+ ],
+ [
+ 6.999969482421875,
+ 4.999908447265625
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..b00d8d211
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+ "weights": [
+ 26925029,
+ 53850057
+ ],
+ "scale": [
+ 3.714016479907864e-08,
+ 3.714016479907864e-08
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..df5d06c09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015212147263810039,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..94c794fbb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,61 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 4096,
+ 8192
+ ],
+ [
+ -12288,
+ -16384
+ ]
+ ],
+ [
+ [
+ -20479,
+ 24575
+ ],
+ [
+ -28671,
+ 32767
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 16384,
+ -8192
+ ],
+ [
+ 12288,
+ -4096
+ ]
+ ],
+ [
+ [
+ -32767,
+ -24575
+ ],
+ [
+ 28671,
+ 20479
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.00024414807580797754,
+ 0.00024414807580797754
+ ],
+ "zero_point": 0.0,
+ "min": [
+ -8.0,
+ -8.0
+ ],
+ "max": [
+ 8.0,
+ 8.0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..e02eeb9dc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.002048635622486472,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..cd3479781
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 0.9725494384765625,
+ 1.945098876953125,
+ 3.039216995239258,
+ 4.0117645263671875
+ ],
+ [
+ -8.996077537536621,
+ 9.9686279296875,
+ -10.94117546081543,
+ 12.035295486450195
+ ]
+ ],
+ [
+ [
+ 4.98431396484375,
+ 5.9568634033203125,
+ 7.050981521606445,
+ 8.023530960083008
+ ],
+ [
+ 13.007843017578125,
+ -13.980391502380371,
+ 14.95294189453125,
+ -16.04705810546875
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json
new file mode 100644
index 000000000..e60ff312e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+ "weights": [
+ 2156,
+ 4312,
+ 6468,
+ 8624
+ ],
+ "scale": 0.0004638272181067826
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..4ec4ef2d7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0038153529167175293,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json
new file mode 100644
index 000000000..01835fbde
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json
@@ -0,0 +1,38 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 140,
+ 148,
+ 157,
+ 165
+ ],
+ [
+ 58,
+ 214,
+ 42,
+ 231
+ ]
+ ],
+ [
+ [
+ 173,
+ 181,
+ 190,
+ 198
+ ],
+ [
+ 239,
+ 17,
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.12156862765550613,
+ "zero_point": 132.0,
+ "min": -16.04705810546875,
+ "max": 14.952940940856934
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json
new file mode 100644
index 000000000..39c64f3ef
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.07362665981054306,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..20c1f6759
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1.00018310546875,
+ 2.0,
+ 2.99981689453125,
+ 4.0001220703125
+ ],
+ [
+ -9.00006103515625,
+ 10.0,
+ -10.99993896484375,
+ 11.9998779296875
+ ]
+ ],
+ [
+ [
+ 5.0001220703125,
+ 6.0,
+ 6.9998779296875,
+ 8.000244140625
+ ],
+ [
+ 13.0,
+ -14.0,
+ 15.0,
+ -16.0
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..632333144
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+ "weights": [
+ 17503969,
+ 32507370,
+ 45510319,
+ 56887898
+ ],
+ "scale": [
+ 5.7129901172951205e-08,
+ 6.152450895548591e-08,
+ 6.591911673802062e-08,
+ 7.031372452055533e-08
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..7105a686d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014399811334442347,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..d465a7c17
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,53 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 2521,
+ 4681,
+ 6553,
+ 8192
+ ],
+ [
+ -22685,
+ 23405,
+ -24029,
+ 24575
+ ]
+ ],
+ [
+ [
+ 12603,
+ 14043,
+ 15291,
+ 16384
+ ],
+ [
+ 32767,
+ -32767,
+ 32767,
+ -32767
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.0003967406231879635,
+ 0.0004272591326639607,
+ 0.0004577776421399579,
+ 0.0004882961516159551
+ ],
+ "zero_point": 0.0,
+ "min": [
+ -13.0,
+ -14.0,
+ -15.0,
+ -16.0
+ ],
+ "max": [
+ 13.0,
+ 14.0,
+ 15.0,
+ 16.0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..2d84cd7d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0031168656423687935,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json
new file mode 100644
index 000000000..e1da53ab0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+ "weights": [
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..ecb49bb64
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+ "weights": [
+ 415,
+ -829,
+ -1244,
+ 1658
+ ],
+ "scale": 0.00241205753304663
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json
new file mode 100644
index 000000000..654824b5d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03844216465950012,
+ "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json
new file mode 100644
index 000000000..3baa42155
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.741962730884552,
+ "zero_point": 156.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json
new file mode 100644
index 000000000..940224049
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json
@@ -0,0 +1,80 @@
+{
+ "weights": [
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ]
+ ],
+ "scale": 0.062745101749897,
+ "zero_point": 127.0,
+ "min": -7.9686279296875,
+ "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json
new file mode 100644
index 000000000..559e537fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+ "weights": [
+ [
+ 1.000030517578125,
+ 2.00006103515625,
+ -3.000091552734375,
+ -4.0001220703125,
+ -4.999908447265625,
+ 5.99993896484375,
+ -6.999969482421875,
+ 8.0,
+ 4.0001220703125,
+ -2.00006103515625,
+ 3.000091552734375,
+ -1.000030517578125,
+ -8.0,
+ -5.99993896484375,
+ 6.999969482421875,
+ 4.999908447265625
+ ],
+ [
+ 1.000030517578125,
+ 2.00006103515625,
+ -3.000091552734375,
+ -4.0001220703125,
+ -4.999908447265625,
+ 5.99993896484375,
+ -6.999969482421875,
+ 8.0,
+ 4.0001220703125,
+ -2.00006103515625,
+ 3.000091552734375,
+ -1.000030517578125,
+ -8.0,
+ -5.99993896484375,
+ 6.999969482421875,
+ 4.999908447265625
+ ],
+ [
+ 1.000030517578125,
+ 2.00006103515625,
+ -3.000091552734375,
+ -4.0001220703125,
+ -4.999908447265625,
+ 5.99993896484375,
+ -6.999969482421875,
+ 8.0,
+ 4.0001220703125,
+ -2.00006103515625,
+ 3.000091552734375,
+ -1.000030517578125,
+ -8.0,
+ -5.99993896484375,
+ 6.999969482421875,
+ 4.999908447265625
+ ],
+ [
+ 1.000030517578125,
+ 2.00006103515625,
+ -3.000091552734375,
+ -4.0001220703125,
+ -4.999908447265625,
+ 5.99993896484375,
+ -6.999969482421875,
+ 8.0,
+ 4.0001220703125,
+ -2.00006103515625,
+ 3.000091552734375,
+ -1.000030517578125,
+ -8.0,
+ -5.99993896484375,
+ 6.999969482421875,
+ 4.999908447265625
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..0186c03f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+ "weights": [
+ 27619368,
+ -55238737,
+ -82858105,
+ 110477474
+ ],
+ "scale": [
+ 3.620647604581258e-08,
+ 3.620647604581258e-08,
+ 3.620647604581258e-08,
+ 3.620647604581258e-08
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json
new file mode 100644
index 000000000..1fd68cabe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014829720021225512,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json
new file mode 100644
index 000000000..b2950218c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.003870659740641713,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json
new file mode 100644
index 000000000..69254d12b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json
@@ -0,0 +1,95 @@
+{
+ "weights": [
+ [
+ 4096,
+ 8192,
+ -12288,
+ -16384,
+ -20479,
+ 24575,
+ -28671,
+ 32767,
+ 16384,
+ -8192,
+ 12288,
+ -4096,
+ -32767,
+ -24575,
+ 28671,
+ 20479
+ ],
+ [
+ 4096,
+ 8192,
+ -12288,
+ -16384,
+ -20479,
+ 24575,
+ -28671,
+ 32767,
+ 16384,
+ -8192,
+ 12288,
+ -4096,
+ -32767,
+ -24575,
+ 28671,
+ 20479
+ ],
+ [
+ 4096,
+ 8192,
+ -12288,
+ -16384,
+ -20479,
+ 24575,
+ -28671,
+ 32767,
+ 16384,
+ -8192,
+ 12288,
+ -4096,
+ -32767,
+ -24575,
+ 28671,
+ 20479
+ ],
+ [
+ 4096,
+ 8192,
+ -12288,
+ -16384,
+ -20479,
+ 24575,
+ -28671,
+ 32767,
+ 16384,
+ -8192,
+ 12288,
+ -4096,
+ -32767,
+ -24575,
+ 28671,
+ 20479
+ ]
+ ],
+ "scale": [
+ 0.00024414807580797754,
+ 0.00024414807580797754,
+ 0.00024414807580797754,
+ 0.00024414807580797754
+ ],
+ "zero_point": 0.0,
+ "min": [
+ -8.0,
+ -8.0,
+ -8.0,
+ -8.0
+ ],
+ "max": [
+ 8.0,
+ 8.0,
+ 8.0,
+ 8.0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..9bf6c9bff
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03876218944787979,
+ "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..87de1116e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.029836513102054596,
+ "zero_point": 88.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..5d9052815
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015059474390000105,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..25491f05d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014986195310484618,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..ede36c6ad
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.039086975157260895,
+ "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..bd2fc7f62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.028692100197076797,
+ "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..18c3b0421
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015251495642587543,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..145ee8fda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00013844699424225837,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json
new file mode 100644
index 000000000..394cfb322
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json
@@ -0,0 +1,5 @@
+{
+ "weights": [
+ -1
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..bbff8952d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03780897706747055,
+ "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 136,
+ 153,
+ 68
+ ],
+ [
+ 51,
+ 34,
+ 221
+ ]
+ ],
+ [
+ [
+ 0,
+ 255,
+ 187
+ ],
+ [
+ 85,
+ 170,
+ 102
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.05882352963089943,
+ "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..cec0bdf9a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.232084259390831,
+ "zero_point": 111.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..f329b43be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0001513722527306527,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 4096,
+ 8192,
+ -12288
+ ],
+ [
+ -16384,
+ -20479,
+ 24575
+ ]
+ ],
+ [
+ [
+ -28671,
+ 32767,
+ 16384
+ ],
+ [
+ -8192,
+ 12288,
+ -4096
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.0002441480755805969,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..4b5118c3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.000991688808426261,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json
new file mode 100644
index 000000000..7c001602f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json
@@ -0,0 +1,13 @@
+{
+ "weights": [
+ [
+ [
+ 51,
+ 153,
+ 255
+ ]
+ ]
+ ],
+ "scale": 0.0019607844296842813,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..05ce9dd2c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03849203139543533,
+ "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..8f883094a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.02848827838897705,
+ "zero_point": 82.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json
new file mode 100644
index 000000000..6f99899d5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json
@@ -0,0 +1,21 @@
+{
+ "weights": [
+ [
+ [
+ 1,
+ 1,
+ 1
+ ]
+ ]
+ ],
+ "scale": [
+ 0.10000000149011612,
+ 0.30000001192092896,
+ 0.5
+ ],
+ "zero_point": [
+ 0,
+ 0,
+ 0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..7d1f4c795
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015214986342471093,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..533c1e3e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015159364556893706,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..3b97773ce
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03907399624586105,
+ "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..698a8a7ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.01955186203122139,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..5a52a1b7b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0001474507007515058,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..ff9e41ec8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0001422425702912733,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.038689617067575455,
+ "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.038689617067575455,
+ "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.038689617067575455,
+ "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+ "weights": [
+ 0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014983004075475037,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014983004075475037,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00014983004075475037,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+ "weights": [
+ 0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..76a0440a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 0.960784912109375,
+ 2.0588245391845703
+ ],
+ [
+ -3.0196075439453125,
+ -3.980391502380371
+ ],
+ [
+ 4.9411773681640625,
+ -6.039215087890625
+ ]
+ ],
+ [
+ [
+ 7.0,
+ 7.960784912109375
+ ],
+ [
+ -9.058823585510254,
+ -10.019607543945312
+ ],
+ [
+ 10.980392456054688,
+ -11.941176414489746
+ ]
+ ],
+ [
+ [
+ 13.039216995239258,
+ 14.000001907348633
+ ],
+ [
+ -14.960784912109375,
+ -16.05882453918457
+ ],
+ [
+ 17.019607543945312,
+ -17.980392456054688
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..dc5ca8dd5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.03869570419192314,
+ "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..bc150bbb0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 138,
+ 146
+ ],
+ [
+ 109,
+ 102
+ ],
+ [
+ 167,
+ 87
+ ]
+ ],
+ [
+ [
+ 182,
+ 189
+ ],
+ [
+ 65,
+ 58
+ ],
+ [
+ 211,
+ 44
+ ]
+ ],
+ [
+ [
+ 226,
+ 233
+ ],
+ [
+ 22,
+ 14
+ ],
+ [
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.13725490868091583,
+ "zero_point": 131.0,
+ "min": -17.980392456054688,
+ "max": 17.019609451293945
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..bfd862189
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 1.6333034038543701,
+ "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..6df24eb42
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 0.999786376953125,
+ 2.0001220703125
+ ],
+ [
+ -2.999908447265625,
+ -4.000244140625
+ ],
+ [
+ 5.000030517578125,
+ -5.99981689453125
+ ]
+ ],
+ [
+ [
+ 7.000152587890625,
+ 7.99993896484375
+ ],
+ [
+ -9.000274658203125,
+ -10.00006103515625
+ ],
+ [
+ 10.999847412109375,
+ -12.00018310546875
+ ]
+ ],
+ [
+ [
+ 12.999969482421875,
+ 13.999755859375
+ ],
+ [
+ -15.000091552734375,
+ -15.9998779296875
+ ],
+ [
+ 17.000213623046875,
+ -18.0
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..82f7fa2b6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.00015178922330960631,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..8d0ceb1c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1820,
+ 3641
+ ],
+ [
+ -5461,
+ -7282
+ ],
+ [
+ 9102,
+ -10922
+ ]
+ ],
+ [
+ [
+ 12743,
+ 14563
+ ],
+ [
+ -16384,
+ -18204
+ ],
+ [
+ 20024,
+ -21845
+ ]
+ ],
+ [
+ [
+ 23665,
+ 25485
+ ],
+ [
+ -27306,
+ -29126
+ ],
+ [
+ 30947,
+ -32767
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.0005493331705679495
+ ],
+ "zero_point": 0.0,
+ "min": [
+ -18.0
+ ],
+ "max": [
+ 18.0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..f370bf44d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0122029148042202,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake
index 4eb7204e1..5ce8dfb5d 100644
--- a/compiler/pota-quantization-value-test/requires.cmake
+++ b/compiler/pota-quantization-value-test/requires.cmake
@@ -2,4 +2,4 @@ require("record-minmax")
require("circle-quantizer")
require("circle-tensordump")
require("common-artifacts")
-require("mio-circle")
+require("mio-circle04")
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst
index 4beec8c0e..e169de57c 100644
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -31,3 +31,32 @@ addTest(Split_000 channel int16)
addTest(TransposeConv_001 channel uint8)
addTest(TransposeConv_001 channel int16)
addTest(TransposeConv_001 layer uint8)
+
+addQConfTest(Add_002 layer uint8)
+addQConfTest(Add_002 channel int16)
+addQConfTest(AveragePool2D_000 layer uint8)
+addQConfTest(AveragePool2D_000 channel int16)
+addQConfTest(Concatenation_001 layer uint8)
+addQConfTest(Concatenation_001 channel int16)
+addQConfTest(Conv2D_004 channel int16)
+addQConfTest(Conv2D_004 layer uint8)
+addQConfTest(DepthwiseConv2D_002 channel int16)
+addQConfTest(DepthwiseConv2D_002 layer uint8)
+addQConfTest(FullyConnected_003 channel int16)
+addQConfTest(FullyConnected_003 layer uint8)
+#addQConfTest(InstanceNorm_001 layer uint8) Enable this when int16 CWQ data is ready.
+#addQConfTest(InstanceNorm_001 channel int16) Enable this when int16 CWQ data is ready.
+addQConfTest(Mean_000 layer uint8)
+addQConfTest(Mean_000 channel int16)
+addQConfTest(MaxPool2D_000 layer uint8)
+addQConfTest(MaxPool2D_000 channel int16)
+addQConfTest(Mul_001 layer uint8)
+addQConfTest(Mul_001 channel int16)
+addQConfTest(PRelu_001 layer uint8)
+addQConfTest(PRelu_001 channel int16)
+addQConfTest(ReLU_000 layer uint8)
+addQConfTest(ReLU_000 channel int16)
+addQConfTest(Split_000 channel uint8)
+addQConfTest(Split_000 channel int16)
+addQConfTest(TransposeConv_001 channel int16)
+addQConfTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh
new file mode 100755
index 000000000..070b2738e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# This script tests fake quantization with config file
+#
+# HOW TO USE
+#
+# ./test_fake_wquant_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+ MODELNAME=$1; shift
+ GRANULARITY=$1; shift
+ DTYPE=$1; shift
+ TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.fake_quantized.mixed.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}_fake_quantization_with_config.log" <(
+ exec 2>&1
+ set -ex
+
+ # Run circle-quantizer with --quantize_dequantize_weights
+ "${CIRCLE_QUANTIZER_PATH}" \
+ --quantize_dequantize_weights float32 "${DTYPE}" "${GRANULARITY}" \
+ --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+ "${WORKDIR}/${MODELNAME}.circle" \
+ "${TEST_RESULT_FILE}.fake_quantized.mixed.circle"
+
+ # Dump weights values (circle-tensordump)
+ "${CIRCLE_TENSORDUMP_PATH}" \
+ "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+ --tensors_to_hdf5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5"
+
+ # Compare result
+ "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+ --input_h5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5" \
+ --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/fake_quantization" \
+ --mode fake_quantization
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$TESTCASE")
+ else
+ FAILED+=("$TESTCASE")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt
new file mode 100644
index 000000000..b6e2efa3d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.8596993, 4.8127713,-3.4127183, 4.2323627,-2.2201376,-1.5362649,-4.9921966, 0.9565166, 3.2879171,-1.3590081,-3.771852 ,-4.1042285
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt
new file mode 100644
index 000000000..bcf2807ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.14624089, 4.7304125 , 4.833998 , 4.2321773 ,-2.0582533 ,-2.3694758 , 1.4213978 , 2.2444596 , 3.3630798 ,-0.70257574, 3.586656 ,-2.513805
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt
new file mode 100644
index 000000000..c3e32d2c5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 2.175218 , 0.02776978,-2.6291077 , 3.5350094 ,-1.2364857 ,-3.3151364 ,-0.92507887, 2.8038094 ,-1.8781518 , 3.6221995 , 2.4015775 ,-2.9217577
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt
new file mode 100644
index 000000000..a92abd4f6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.0345451,-1.5055941,-4.144375 ,-4.727011 , 1.5841546, 4.5780725,-4.24402 ,-2.3966947,-3.0370803,-1.0234503,-0.2750057, 3.2965126
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt
new file mode 100644
index 000000000..2f2937fcb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.4460397 , 2.6090143 , 4.1773095 , 0.11204174,-3.3053472 , 2.5160108 ,-3.0612547 , 1.0667087 , 2.8952355 , 3.842513 , 0.6790793 ,-0.33375
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt
new file mode 100644
index 000000000..a219546a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-0.48516417,-4.5555663 ,-2.9907737 , 2.422857 , 1.010034 , 3.6436582 , 0.29334423,-4.0628953 , 1.0116768 , 3.0871766 , 3.3341465 , 4.3921704
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt
new file mode 100644
index 000000000..70d3139a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-0.7787985 , 4.101575 ,-0.4839729 , 0.35971674,-4.3452406 ,-4.811665 ,-3.8693128 , 4.239326 , 0.44103175, 3.5549765 , 2.5334291 , 1.4546562
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt
new file mode 100644
index 000000000..3c38f8d5d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.5943313,-1.4843192, 1.956341 ,-1.3242344, 1.5901331,-3.641623 , 4.6022506,-0.307265 ,-0.6359913,-4.0109854,-1.2064985, 1.1137954
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt
new file mode 100644
index 000000000..e89a022f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.1036437 ,-0.39538398,-0.07278133, 4.547673 , 3.9132211 , 2.6468625 ,-4.2830634 ,-2.0573084 , 2.1074655 ,-4.0634165 ,-4.55598 ,-0.7942089
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt
new file mode 100644
index 000000000..2b00832cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.7745228, 1.4813256, 4.4699864, 3.7466738,-2.9847758,-4.453416 , 3.2515864,-1.2459193,-4.44965 ,-1.8452735, 4.423347 , 4.2998137
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..e42cbf88b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1358833e+00, 1.7854472e+00, 4.1751757e+00, 5.5915713e-01,-2.6459083e-01,-1.7176826e+00,-1.8155930e+00, 2.8710868e+00,-2.7043006e+00, 1.0959731e+00,-2.0176995e+00,-6.5950048e-01,-3.6413522e+00,-4.1966043e+00,-2.6820884e+00,-3.6055098e+00, 3.6852844e+00, 8.9128174e-02, 1.3107824e+00,-3.6425626e+00,-3.2318896e-01, 3.6238370e+00,-4.9837337e+00,-4.0550299e+00,-1.4882606e+00, 1.5547658e+00,-1.1696080e+00, 2.1651111e+00, 4.9318314e+00,-3.5928023e+00,-1.2348548e+00,-1.7002642e+00, 1.7365140e+00,-8.8151926e-01,-4.1655774e+00,-1.0166957e+00,-3.7440193e+00, 2.8588972e+00, 4.1286149e+00,-4.9504828e+00, 4.8477168e+00,-2.2587967e+00, 2.8542519e+00,-7.9565448e-01, 6.8252671e-01, 2.5875571e-01,-6.3935977e-01,-4.8547015e+00, 4.1373856e-03,-1.3893708e+00, 8.8775367e-01, 2.1222150e-01, 3.1871333e+00, 1.3869151e+00,-3.8274391e+00, 3.2623324e+00, 7.2669631e-01, 1.0303619e+00, 8.1438148e-01, 8.1272924e-01,-2.7527118e+00, 1.8215455e+00,-1.6416427e-01, 4.9103169e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..7caf8ce9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.250757 , 1.4186406 , 0.63726735,-0.35924944, 1.9436699 , 3.2695885 , 3.6638293 , 4.5166173 , 1.3807241 ,-1.9112543 ,-1.9026492 ,-0.4800549 , 2.818216 ,-4.6390033 ,-3.8570547 , 3.6634028 ,-1.2112037 ,-1.3335027 , 1.3524677 , 2.7240725 ,-3.8335826 , 1.1397903 ,-3.1570992 ,-4.802078 , 3.8334577 , 0.23457901, 0.7132307 , 2.9887354 , 2.9702394 ,-1.4113717 ,-0.66712093, 0.77366674, 1.9308351 ,-0.45465755, 4.925366 , 2.4214447 , 2.8401468 , 0.49789894, 0.53141665,-2.7466767 , 0.2059374 ,-4.9661317 ,-4.1334467 , 1.6928389 ,-0.42529574, 1.1033608 , 4.275776 , 1.5063075 , 2.3528252 , 0.79505247, 3.9829993 ,-4.8472476 ,-1.2752185 , 3.7365675 , 1.976164 ,-4.742636 ,-2.7199092 ,-2.9191706 ,-3.181069 ,-4.489485 , 4.0847454 , 2.2164 , 0.9725334 ,-0.72566307
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..7facffa57
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-3.8293874 ,-0.13678598,-2.5444264 , 1.654611 ,-4.3037786 ,-3.4240584 ,-4.5642533 , 4.1250315 , 1.0469195 , 4.2802887 , 3.1617825 ,-3.1706758 ,-0.99622065, 2.7707603 , 3.7494645 ,-1.4548893 , 2.328633 , 1.7976477 ,-1.2107176 ,-2.0178459 ,-0.6488357 ,-2.9393644 , 2.8918762 , 3.6192262 ,-4.1777225 , 1.3264071 , 0.32620123, 0.7890992 ,-3.304334 , 3.4893208 , 2.5354576 ,-4.7718143 , 3.8602633 , 0.4927564 , 2.2971296 ,-0.3296792 , 2.8115997 ,-0.75152504, 0.558675 ,-2.343631 , 4.650826 ,-3.0893488 , 0.8726873 , 0.24922371, 2.7634025 , 1.0358421 ,-3.862506 ,-3.169402 ,-2.5373347 , 0.9484093 , 4.1409917 ,-4.0408096 ,-2.7231216 ,-2.548547 ,-2.6315095 , 0.8164778 ,-3.017436 , 1.1860138 ,-1.8634807 , 1.8684052 , 1.8657844 , 1.7747321 ,-3.1472425 ,-1.3989028
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..0be8fdd19
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.0492268 ,-2.2555764 ,-1.3543441 ,-3.7278662 ,-4.8601675 , 3.1095552 , 4.6319957 , 3.0211062 , 1.7870535 , 4.8839574 ,-1.3494394 , 2.635408 ,-0.24201432, 1.312397 , 0.16790341, 2.42507 ,-3.101355 , 3.1760497 ,-4.500736 ,-2.53691 , 1.064206 , 0.62096214, 2.803344 ,-4.6166744 ,-4.624786 , 3.667064 ,-1.484021 , 4.9401817 ,-3.763283 , 3.4351027 ,-2.906393 , 4.9945946 ,-3.2997096 , 3.6325612 ,-0.47211674, 0.28783202, 1.8703817 ,-4.042374 ,-3.3353784 , 4.9085765 ,-1.6753131 ,-3.4926984 ,-4.8663344 ,-4.495712 , 2.3402312 ,-1.0722051 , 0.28559962, 2.1208072 , 1.3024254 , 3.4810693 , 0.09860361, 1.695624 , 1.3901931 , 1.6858819 , 3.8231227 , 4.5972557 ,-4.6835494 , 0.5753765 ,-2.2377403 , 0.13013013,-2.1165738 ,-0.26044115,-0.653468 , 1.1010929
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..7e2d618f9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 4.397323 ,-0.51448834, 2.5729322 ,-4.3229046 , 1.149113 ,-3.8652143 ,-1.7352968 ,-0.7575065 ,-0.41720778, 4.327346 ,-4.2363043 , 0.8653738 ,-1.7511971 ,-0.7874244 ,-4.0734816 , 2.5622475 ,-3.1229742 ,-1.1783633 , 0.4017013 ,-0.76175183,-1.058416 , 1.128772 ,-3.0143378 ,-2.6688366 ,-2.575279 ,-4.326955 , 4.175434 , 4.791393 ,-1.10654 ,-4.4417224 , 3.5057635 , 1.5339037 ,-4.0297494 ,-3.7187057 ,-0.6645762 , 4.215642 , 1.6742749 , 2.5468905 , 1.73195 ,-3.3100636 ,-4.4818826 ,-2.5627983 ,-1.4624406 , 1.2433167 ,-4.005364 ,-4.3450556 ,-1.0652863 ,-1.0240986 , 3.989825 ,-4.1690702 ,-4.595108 ,-1.1154945 , 0.65749156, 2.5127344 , 2.509761 ,-4.3936505 , 3.6513395 ,-2.3340352 ,-4.3615093 , 3.5973237 , 0.9316653 , 1.9391845 , 3.6356397 , 0.8133118
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..2a6b09b27
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.629505 , 1.0121975 ,-0.13417433,-2.329806 ,-3.4927373 ,-0.7574039 ,-2.2674313 , 3.1983519 , 2.4298382 ,-0.23268977, 2.0218065 ,-1.5087285 ,-1.3953347 ,-3.8100643 ,-1.7438283 , 3.9852605 , 2.9817178 ,-4.0460877 , 0.09402129, 4.3802586 ,-1.0991771 , 0.4134776 , 2.8136911 ,-3.6254618 ,-3.925183 , 4.691824 , 4.381538 ,-3.235543 ,-2.6764185 , 2.659456 ,-3.2127233 , 0.0206281 , 3.4056723 ,-1.693684 , 1.1005328 ,-3.1486542 , 0.77198106, 1.4526777 ,-2.3614178 , 4.8214664 ,-3.1486242 , 0.58941853,-4.1100698 , 4.1982718 , 1.7219902 ,-2.4375956 ,-1.7505955 , 1.7465224 ,-2.7494361 , 4.0679016 , 1.8936038 ,-4.523818 ,-3.4124248 ,-4.809946 ,-1.939553 , 4.9411273 , 1.6261404 ,-2.6846552 , 2.1339247 , 0.61396503,-1.6662381 , 2.4282491 , 2.662007 ,-0.40868336
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..470da6c74
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.70593804, 3.253847 , 1.1094694 , 0.5295975 , 0.5944647 ,-2.4391694 , 4.7912955 , 4.4374456 ,-2.942428 ,-3.5038033 ,-3.180417 , 2.1914082 ,-4.5295396 ,-3.0037553 ,-2.265191 , 0.20113531, 2.3805366 ,-0.9111223 ,-4.3170924 , 4.08436 , 1.1006241 ,-1.286977 , 4.811279 , 0.9131829 , 3.2051497 ,-2.8660698 ,-3.188871 , 1.4163305 , 4.061829 , 2.7783196 ,-3.4975152 , 3.4888391 , 2.5789826 ,-1.5264264 ,-0.13952135,-1.280177 , 2.4716458 , 2.6200528 ,-2.515086 , 3.441416 , 2.4515297 ,-0.9845471 , 0.9481396 , 1.1518412 , 1.6088997 , 1.445077 , 2.2620194 ,-2.0843177 ,-0.7263964 , 1.8159748 ,-3.3673623 , 0.2554476 ,-4.3550563 ,-1.4280493 ,-2.2702312 ,-4.7424164 ,-0.57241255,-2.813357 , 2.9161859 ,-0.9036504 , 0.00511268, 0.60724795, 4.8010454 , 1.6000834
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..d9e048b61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 7.07888961e-01, 4.75798702e+00,-1.47843570e-01,-1.95845592e+00, 4.26537895e+00,-3.03711486e+00,-1.35137546e+00,-1.10638596e-01,-1.02415502e+00,-2.65345359e+00, 5.48920631e-01,-4.38003826e+00, 3.61377740e+00,-2.91408587e+00,-3.22874010e-01,-4.74363208e-01, 3.45294738e+00, 1.02204478e+00,-1.44102740e+00, 6.80687547e-01,-2.44050741e+00, 3.71395111e+00,-2.14443612e+00, 3.70928717e+00, 1.35871637e+00, 9.73374963e-01, 1.57826161e+00,-2.91381836e-01, 1.46376801e+00, 2.96391749e+00, 1.08418810e+00,-3.50718546e+00, 4.68637037e+00, 1.04839933e+00, 2.24482760e-01, 2.38816309e+00, 3.18772525e-01,-3.90284014e+00,-3.32757282e+00,-1.61143410e+00,-1.26013708e+00, 2.24948835e+00, 7.63151050e-01, 4.18296242e+00,-8.69123042e-01, 3.19850564e-01, 3.52391124e-01, 3.30018830e+00,-4.64861393e+00,-4.64479780e+00,-2.68103647e+00,-1.13277221e+00, 2.02201343e+00,-4.05572534e-01, 3.06759548e+00,-3.55881310e+00,-1.14900565e+00,-3.00835490e+00, 1.31509733e+00, 2.50206441e-01, 2.47731134e-01, 4.98673916e+00,-1.74064383e-01,-4.43180744e-03
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..cdbf98e8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5591762 , 4.8821726 , 0.44271094, 4.786732 ,-2.4497197 , 2.4973536 , 2.034311 , 4.8329844 ,-3.9451184 , 4.9937835 , 2.0246332 ,-2.8319602 , 3.9617133 , 4.10946 ,-4.3191586 ,-2.8492777 ,-2.648121 ,-4.199404 ,-0.05163948,-4.7944984 , 2.8989205 , 1.4747709 ,-3.1194637 ,-2.877846 ,-0.39301065, 2.616311 , 2.6305614 , 1.7303206 , 3.6059175 ,-2.745988 , 2.5924454 , 3.0149276 , 4.0359216 ,-0.6135884 ,-2.5023808 ,-2.3395267 ,-3.0633461 ,-2.3836162 ,-4.4779797 ,-1.30866 , 1.9110863 , 0.654628 ,-4.559368 , 0.34231895,-0.8196542 , 4.7275734 , 3.2823656 ,-4.9644713 , 2.9191613 ,-3.4621727 ,-4.276584 ,-1.7153062 , 1.8820064 , 1.2659297 , 3.4141889 ,-4.905296 , 4.619848 ,-3.9501083 ,-1.5550466 , 3.6841137 , 1.7121594 , 1.9466268 , 1.5684807 , 4.5554323
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..065d77df6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.2269225 ,-1.2782103 ,-3.381931 ,-1.5229299 , 2.0681949 , 1.7630705 ,-0.81455594,-2.6558595 ,-3.4870632 ,-4.647749 , 2.4453654 ,-2.242679 ,-1.0272806 , 0.5656208 , 0.69442594,-4.4343104 ,-3.9649677 ,-3.8908577 ,-1.642287 , 3.0714357 , 1.0880747 ,-2.1665683 ,-4.0994506 , 2.004911 , 3.5922902 , 3.775 , 1.1580672 ,-1.4154137 ,-4.4964633 ,-1.696588 , 4.0220857 ,-1.2785947 ,-4.2075186 ,-4.515838 , 0.99715126, 3.0928102 ,-2.295537 ,-4.772882 ,-1.2936146 ,-2.6903791 , 0.10453273,-1.8041211 , 3.787591 , 0.9493053 ,-4.41586 , 3.4252715 ,-0.25001565, 4.655357 ,-1.8767506 , 0.00600041, 4.660605 , 2.550518 ,-3.830558 , 1.7777463 ,-0.7170577 ,-0.26554853,-3.5770113 ,-1.1354474 , 4.663121 , 3.100427 , 0.03313563,-1.7419808 ,-1.4426676 ,-3.912533
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..9def1c2eb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.24671102,3.271825 ,3.979895 ,1.3334678
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..eaec2409f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 1.9181111, 2.2396102,-2.8641696,-1.9045062
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..3e05181cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+4.751434 ,2.8798263 ,0.15149078,2.9485583
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..19d95b267
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.5743442 , 0.6716824 , 0.75737774,-0.27396253
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..d302e07a9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.0539489 , 1.9595883 , 0.19975437, 2.526178
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..af1c2dff8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0575085 , 2.5941508 ,-2.550309 ,-0.03760919
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..0ede613ac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.4857123,-4.032874 ,-3.687589 ,-1.235227
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..b0b0392ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.21878362, 3.9175916 ,-4.6141233 , 3.709655
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..d8a8cad12
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-1.9645791,-1.4466153, 1.2543651,-1.0288917
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..ca2a1c3b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.1611342, 2.4875243, 3.096089 ,-1.1327268
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt
new file mode 100644
index 000000000..0614b5e83
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.01090685,0.0581577 ,0.637094 ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt
new file mode 100644
index 000000000..b1c39382f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt
new file mode 100644
index 000000000..7e562de75
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567 ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524 ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098 ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt
new file mode 100644
index 000000000..2958a7f54
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911 ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt
new file mode 100644
index 000000000..fc969308e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt
new file mode 100644
index 000000000..f82ad6704
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.4040831 , 4.8621206 , 0.22880335,-0.3116556 , 0.260938 ,-0.61554366, 3.779648 ,-4.650609 , 3.886638 ,-0.25574106,-0.45002133, 4.9870906 ,-2.3277295 ,-4.9648423 ,-3.7695415 , 3.2857463 ,-4.5514555 ,-3.7705963 , 3.8458307 ,-4.797776 ,-3.4295716 ,-4.6026535 ,-1.4011091 , 2.8851774
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt
new file mode 100644
index 000000000..722337286
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.171929 ,-2.2911541 , 2.8965824 , 0.27504483,-1.6088463 ,-0.6509234 ,-3.262618 , 0.9633116 , 2.4504175 , 0.97706884, 0.4212074 , 1.4083375 ,-2.9757218 ,-3.1010823 ,-1.7146534 , 4.105306 , 0.07195274, 3.0232217 ,-2.7568955 ,-4.8887763 ,-3.4171093 ,-0.91494775, 2.5260248 , 4.74184
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt
new file mode 100644
index 000000000..1283a8ad1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.14139967, 1.9541235 ,-4.945228 ,-0.48999134, 3.7479703 , 0.29318067, 0.21036309, 4.357736 ,-4.3354783 ,-1.9236348 , 0.49615476,-1.8418436 ,-2.425741 , 4.817022 , 1.5093465 , 2.417444 ,-4.69463 , 0.3433745 ,-4.5979595 ,-3.9027495 ,-0.29977685, 4.9239326 ,-0.39175773, 1.277211
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt
new file mode 100644
index 000000000..c931e1752
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-3.692852 ,-1.0075341 ,-2.4409268 , 0.92995465,-3.1325107 , 4.028981 , 0.8446181 ,-2.2990613 , 4.0820794 , 3.1633005 , 4.1527267 ,-3.9514909 , 2.6104712 , 4.660645 ,-1.7398617 , 0.15663597,-3.6861904 ,-2.9019265 , 3.8828175 ,-2.712909 , 4.3699546 ,-3.5953352 ,-3.0655813 , 0.59767616
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt
new file mode 100644
index 000000000..d33c2dbec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.8695228 , 2.865197 , 0.6635586 , 0.22709726, 2.85572 ,-4.2051144 , 1.5833759 ,-4.4277377 , 4.0004573 , 2.4766827 , 3.0412688 ,-4.8891425 ,-4.489896 , 3.0812325 , 2.1947708 , 1.6387184 , 0.31932488,-0.41092923,-0.0730476 , 0.7265327 , 4.1333 , 3.157228 , 4.7395325 , 3.4576747
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt
new file mode 100644
index 000000000..f4fb503ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt
new file mode 100644
index 000000000..af4b01576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375 ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt
new file mode 100644
index 000000000..57716034e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829 ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt
new file mode 100644
index 000000000..1e03d83b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597 ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt
new file mode 100644
index 000000000..89ee30a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt
new file mode 100644
index 000000000..cc434b0a8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0618963 ,-0.56899416,-2.6450877 , 2.4534085 , 1.98115 , 1.906561 ,-3.9617727 ,-0.6071247 , 3.1096997 , 4.4270124 ,-2.8755112 ,-1.8822336 ,-2.3567479 , 1.9797888 ,-3.5018713 , 3.429169
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt
new file mode 100644
index 000000000..2c637a1d2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.6089132 , 1.4328785 ,-3.2579598 ,-2.1328773 ,-2.6566415 , 2.541386 ,-4.3314023 , 0.48684084, 3.3134763 ,-2.69083 ,-0.45710313,-3.6763198 , 0.22075526,-3.159208 ,-2.1573126 , 4.1621423
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt
new file mode 100644
index 000000000..4b57fe8e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.061572 , 3.0518744 , 2.694435 ,-4.720131 , 1.3782452 , 4.083631 , 4.1221976 ,-1.2299284 , 3.096133 , 3.8382158 ,-1.9518853 , 4.350529 , 0.09219506, 2.6483617 , 0.74373996, 2.7447948
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt
new file mode 100644
index 000000000..49c3022c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.68769 ,-3.2768764 , 3.1849844 , 4.497627 ,-1.2611016 ,-3.1152303 ,-0.8408633 , 0.4938034 , 4.0921655 ,-2.3150117 , 0.10100875,-3.8374226 , 4.08059 ,-0.74594986,-3.1000822 , 4.3654246
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt
new file mode 100644
index 000000000..e02c8ca16
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-3.6168842 , 4.1935644 , 0.73750836, 4.6044145 , 2.8967912 ,-1.8085694 , 4.539956 ,-0.37032878, 1.9738418 , 1.5388782 ,-2.945171 ,-3.3875864 ,-4.516983 ,-3.4998245 ,-4.676514 ,-2.2738194
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt
new file mode 100644
index 000000000..233e5eae3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 2.7731526 , 2.451602 , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679 ,-3.3615496 ,-4.619757 ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548 , 0.78964525
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt
new file mode 100644
index 000000000..6a126081d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276 , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt
new file mode 100644
index 000000000..eccd2c625
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 3.8758078 , 4.978636 ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644 ,-0.9515802 , 3.558274 , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853 ,-4.583811 ,-2.5113506 ,-4.6688786
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt
new file mode 100644
index 000000000..0da05277c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823 ,-2.7866168 ,-3.186763 ,-4.431844 , 3.3113294 , 0.9501982
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt
new file mode 100644
index 000000000..ace24f7c1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 3.9716747 ,-2.254871 , 1.1943274 ,-2.212602 , 3.4311683 , 1.114989 , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908 ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt
new file mode 100644
index 000000000..18b34c8b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.5887886e+00,-4.7446389e+00,-8.6568648e-01,-2.9789083e+00, 4.4470620e+00,-4.6563668e+00,-3.8466794e+00, 1.8815753e-03,-2.7699089e+00, 5.2776605e-01, 3.6518128e+00,-3.0939088e+00,-3.6008542e+00, 7.2454107e-01, 2.2568390e+00,-4.4835806e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt
new file mode 100644
index 000000000..d652da699
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.770412 ,-1.7520845 , 2.4057522 ,-0.74166125,-0.10780027, 4.5796657 ,-3.513094 ,-3.0285823 , 1.2001143 , 2.806742 ,-2.0503895 , 2.8160958 ,-1.5392824 ,-3.7772799 , 2.9158401 ,-1.0586692
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt
new file mode 100644
index 000000000..e6d6e004f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.937408 ,-0.11191579, 2.2054992 , 2.847275 , 3.4895647 , 4.2361116 ,-3.2401278 ,-1.5813186 ,-4.558396 ,-0.89455926, 4.204445 , 3.5968838 , 2.773891 ,-2.9562843 ,-0.62606305,-0.03814701
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt
new file mode 100644
index 000000000..8b472058e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5032003 , 4.6036057 , 0.28915945, 4.671659 ,-1.978598 , 2.1773603 ,-0.54175234,-3.0131943 ,-2.7422159 ,-3.4361897 , 0.2850049 , 4.1412387 ,-4.86403 ,-0.67577606,-1.4206086 ,-2.357092
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt
new file mode 100644
index 000000000..bba80be5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.5063417 , 0.22874236, 2.2677753 ,-4.4159026 , 1.7464 , 4.6051064 ,-4.2867146 , 2.730521 , 1.6372519 , 0.70292765, 3.459053 ,-4.162376 , 0.36788836, 2.213299 , 4.110952 , 1.6797827
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..31a2db03e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1984134 , 3.7565446 , 1.3521377 ,-4.0263743 ,-1.929471 ,-3.7523155 , 1.3858393 , 4.1565247 ,-2.4681342 , 0.3598748 ,-2.0044599 , 3.7168603 , 3.6330557 , 3.0176272 ,-4.4643235 ,-0.1893698 , 3.8839848 ,-4.5703125 , 3.365731 , 4.5556674 , 4.954971 , 1.7591819 ,-0.9497736 ,-0.8527185 ,-1.1863561 ,-4.522639 ,-4.3187394 ,-3.702939 , 0.15341021, 0.8564923 , 1.9076811 , 4.2765 ,-3.7695112 ,-1.6033245 , 2.3159432 ,-1.6656336 , 1.4186145 , 4.334284 , 4.0654674 ,-4.518256 , 0.72815216, 2.5133176 ,-4.238172 , 1.0198449 ,-0.9638457 , 2.5847483 , 4.0381308 , 4.472872 , 0.11794223, 1.3358012 , 1.7975981 , 2.168553 ,-3.5131238 , 3.8412008 , 3.851232 ,-2.130775 , 3.556102 , 0.69062364,-4.668594 ,-4.619906 ,-2.87768 ,-1.0679495 ,-4.523185 , 4.184176
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..2bdd62b24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 2.9193265 , 4.315574 ,-3.7834768 , 3.4352486 , 4.1452866 ,-4.0322523 , 1.8039155 ,-4.080042 ,-1.1999705 , 4.9018297 ,-0.27180746, 1.709373 , 4.3322196 , 4.9179945 ,-3.977508 , 2.3486571 ,-0.11026379,-0.24730131, 2.3269305 , 2.1862001 , 0.92486495, 3.5822759 , 2.8370361 , 3.915398 ,-0.6385275 ,-0.02720119,-1.408676 ,-4.4472733 , 1.2901759 ,-4.60209 ,-2.9502335 ,-2.650517 ,-1.4038593 ,-2.967456 ,-2.0060933 ,-1.9603083 ,-0.4727794 ,-1.7877682 ,-3.9565926 , 1.4452418 , 2.5925353 ,-4.5134907 ,-4.195412 , 2.4681656 , 0.7140492 , 3.0753498 , 0.269442 ,-4.768041 ,-3.5370746 , 1.0272335 ,-0.7654047 ,-1.977087 , 3.1920779 , 0.37378865, 4.016262 ,-3.3201067 ,-4.7767315 ,-3.5074112 ,-4.094166 , 1.6035818 , 1.6506963 ,-3.2142932 , 4.7714067 ,-1.7164946
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..8c770f61d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-1.8028042 , 1.7280815 ,-3.0464594 ,-2.810487 , 0.582805 ,-1.786865 ,-1.7263526 ,-0.36871073, 3.3955328 ,-3.9523299 ,-1.880003 , 4.9068613 , 4.6292953 , 3.9778202 ,-1.859954 , 2.8149757 , 4.5020967 ,-4.160163 , 1.9295161 ,-1.2508658 , 0.5669804 , 0.99246883,-2.4829247 , 0.88920474,-3.7942843 , 2.4626305 , 4.3087935 , 3.0680852 , 3.0893688 , 3.1640174 ,-0.41890725, 0.5377459 ,-4.0344224 ,-4.5812287 , 0.5720303 , 1.802316 ,-0.31413126, 2.9586952 , 1.1723012 ,-4.696369 ,-3.7047153 ,-1.8109767 ,-3.6122723 , 1.2727392 , 4.4057164 , 3.8347735 ,-4.739083 , 2.4655118 , 0.45258832, 4.0693913 ,-3.3486447 ,-0.64714307, 1.4990507 , 2.771129 ,-0.6109979 ,-1.0617865 , 2.0837703 ,-1.633663 , 1.8431798 ,-4.3942385 , 4.8523426 , 1.1941985 , 3.0366988 , 4.7991366
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..8a4c9ebb5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.2375767 ,-1.1274278 , 0.18025301,-4.598087 , 1.1042122 , 3.1241179 , 1.9084688 ,-1.214722 , 4.596646 , 4.1969523 , 4.658112 , 3.143779 ,-2.6940444 ,-1.5482163 , 1.542811 ,-1.1338089 , 3.721594 , 0.24673286, 4.71102 , 2.7811737 , 1.171089 , 4.145586 ,-2.6335135 , 1.1190183 ,-3.7932637 ,-4.6548123 ,-3.10302 ,-3.392706 ,-3.856141 , 0.6618614 , 0.9668614 , 4.4293485 , 1.3193 , 4.983464 , 1.659716 ,-3.185926 , 4.8983006 , 1.6323217 , 0.18800464,-1.9328839 , 4.6031475 , 3.459718 , 4.128766 ,-3.4701612 ,-2.3796144 , 1.6752707 ,-3.6569223 , 2.922704 , 3.642789 ,-1.6817225 , 3.151759 ,-1.5401909 ,-3.8259532 , 2.4556105 ,-4.4989905 , 1.2779988 ,-0.62634754, 3.5827441 ,-0.82541114, 2.1539748 , 4.583461 , 1.2231985 ,-1.4457659 ,-2.9194565
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..5110f86aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-4.011289 , 0.9077414 ,-2.8109396 ,-4.33598 ,-2.6516347 ,-3.917852 , 3.2461808 , 1.7588768 ,-1.9439132 , 2.190185 , 1.5180751 , 0.3587409 ,-4.3434815 ,-4.1376143 , 3.750847 , 1.5820616 , 0.03843357, 4.71235 , 1.0592757 ,-1.7640393 , 0.44547582, 2.8698466 , 4.5816092 , 4.6638517 , 1.4207541 , 1.863644 , 3.6007912 , 0.6800818 ,-2.4884489 , 3.0707197 , 3.3961668 ,-4.331953 , 2.7828538 ,-0.16146964,-4.9070745 ,-2.9787786 , 0.3337284 ,-3.935533 ,-3.303555 , 2.376896 ,-4.7058997 ,-2.2409894 , 0.07352693,-2.6024988 , 4.9593167 ,-4.7717366 , 1.6590588 , 4.063875 ,-3.8855767 , 2.6274624 , 4.901856 , 4.157007 ,-3.292969 , 3.579326 , 3.9860668 ,-3.0936542 ,-4.7793274 , 0.71697485,-2.0354068 ,-2.1414943 , 3.6339438 , 0.10732502,-0.86129206, 4.4152017
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..1a4fc3ed0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 2.2145607 , 0.88045335, 0.45151594, 2.852104 , 3.191637 ,-0.4578638 , 1.4858874 ,-2.1207588 ,-0.77495986,-4.1637363 , 0.83028954,-3.9974387 ,-3.3348315 , 3.7137656 ,-2.9883633 , 3.4332464 , 3.7178712 , 3.5850213 , 0.9240786 ,-0.07091421,-4.516931 , 3.965739 ,-4.828566 , 3.860382 , 0.3243482 , 1.6835089 ,-1.4710085 ,-2.6625636 , 1.942659 , 0.12808529, 1.3640044 ,-3.0124736 ,-3.646485 , 1.6046281 , 1.1087954 ,-2.4648561 ,-2.3274968 , 1.2196178 , 3.0752547 , 1.8316921 ,-2.926682 ,-2.247648 , 4.1264873 , 4.700915 ,-0.6861696 , 3.5246365 ,-2.5577545 , 1.832533 ,-4.3125343 ,-2.8579648 , 3.5299218 ,-0.67911506, 0.86782926,-2.918562 ,-3.3644724 ,-2.0097935 , 0.3721956 ,-1.3528451 , 3.8267515 , 4.916677 , 3.2055025 ,-0.64435905, 3.877367 ,-1.830818
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..09c06c74c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.5410523 , 4.4007382 , 3.3252192 , 0.40420002,-4.7642856 , 2.0282986 , 2.32176 , 3.160375 ,-4.3348713 ,-2.324847 , 4.327631 , 3.253995 , 0.53624976,-4.4896946 , 4.0600896 , 2.697662 ,-3.0693228 ,-4.7954664 , 2.010163 , 4.5790668 , 0.00921074,-4.638007 ,-2.612561 , 4.338762 ,-1.3632652 ,-0.55081725, 4.273717 , 3.1074166 , 3.1386747 ,-4.033469 ,-0.7298752 ,-3.4973295 , 4.454913 ,-0.5148646 ,-2.4100194 , 2.7154703 , 4.1507893 , 2.3424785 ,-1.7028755 ,-2.6013496 ,-1.831555 ,-4.07971 ,-1.039077 ,-1.8733021 ,-3.885844 , 3.5691998 ,-3.8779395 ,-4.7566814 ,-3.570575 ,-3.0510366 ,-4.6841617 ,-4.751285 ,-2.9700782 , 3.4774506 ,-1.3150035 ,-3.6287053 , 2.2280993 , 4.502896 , 3.9448938 , 3.3926914 , 1.560589 , 3.3307595 , 2.6545596 , 2.0503757
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..24b7a248f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.5630627e+00,-4.5077333e+00, 6.8117022e-03,-1.1568142e-02, 2.3568916e+00,-2.9918964e+00,-4.8542055e-01, 4.7381549e+00, 3.1183126e+00,-2.6462586e+00, 3.0083582e+00, 1.4518642e-01,-2.4764729e+00,-4.8520207e+00,-4.8022575e+00,-1.8167463e-01,-3.1106927e+00,-2.4183941e+00,-4.1466684e+00,-3.6997426e+00,-3.9788694e+00,-3.0889416e+00,-2.2332447e+00, 1.8608164e+00, 2.8619974e+00,-3.6986623e+00,-1.3749057e+00,-9.2409855e-01, 2.7646086e+00,-3.3385031e+00, 7.6255083e-01, 1.0236104e+00,-1.7077237e+00,-4.4339476e+00,-1.1930060e+00,-1.7226344e+00,-3.1680160e+00,-1.8338548e+00,-2.6412952e+00,-8.2973856e-01, 4.2303777e+00, 3.4531716e-03,-3.3162324e+00, 8.4682000e-01, 2.5807633e+00, 2.7543969e+00, 6.8153429e-01, 4.7182851e+00, 4.2617507e+00,-1.4446728e+00,-4.3752551e+00, 3.5699592e+00, 9.6946698e-01,-2.0700858e+00, 2.0899124e+00, 1.6371955e+00,-9.5873147e-01, 3.1151581e+00, 2.9369416e+00, 4.4568644e+00,-9.4711387e-01,-4.1349549e+00, 3.3031983e+00, 4.1091359e-01
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..088eb62cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 2.5168443 , 3.7492614 ,-3.7076504 , 0.49709523,-4.642194 , 1.8201847 ,-1.396746 ,-1.0660223 , 3.3333528 ,-1.7719259 ,-2.3515563 ,-2.0570705 ,-4.7125244 ,-1.593302 ,-2.1072757 ,-4.4396334 , 4.3185077 ,-2.7568438 ,-0.59535027,-3.9871383 ,-2.6216223 , 0.39957425,-1.3687986 ,-3.1157744 , 1.2557942 , 2.3428473 ,-4.906711 , 3.5663006 ,-0.46128616,-4.7818427 ,-0.8876555 , 2.5066485 ,-1.3254607 ,-3.6097736 , 1.2686944 ,-1.37061 , 4.762917 ,-3.489012 ,-2.7905307 ,-0.2612837 ,-3.3236315 , 0.8347171 , 2.5582032 , 0.42744452, 1.7428764 , 2.4122005 ,-3.6781132 , 2.8811646 ,-2.7060914 ,-0.4752588 , 0.44432116, 0.5011615 , 3.2550313 , 0.02670379, 2.6197197 ,-4.319786 ,-1.4056181 ,-3.3794782 , 0.66822946,-1.4262298 ,-0.2465175 ,-4.6432767 ,-3.580772 , 2.960096
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..bb8129473
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.9356976 , 3.9426446 ,-4.746647 , 2.3674695 , 0.54803735, 3.1911538 , 0.28858757, 0.4800329 , 2.0652595 ,-4.5046906 , 0.21695825,-0.17217463, 2.4329293 ,-1.2274694 ,-0.11534467,-2.096684 , 2.6882868 ,-2.5291932 , 0.56199783,-2.0743406 , 0.95846254, 4.004705 , 0.89853394, 2.9610496 , 2.9799032 , 1.5339601 ,-1.7136513 , 2.1797504 ,-4.2055335 , 1.5059681 , 3.0828342 ,-1.7946475 ,-2.7096524 , 3.1037905 , 0.75922704,-1.1446673 ,-2.084073 ,-1.2888353 ,-1.6958839 ,-0.8388285 ,-1.0279479 , 1.1291095 , 4.080411 , 3.6791847 , 0.9237894 ,-4.70821 , 0.5730598 ,-1.3565379 ,-2.7533107 ,-0.4583869 ,-1.4416862 ,-3.6039822 ,-1.1611387 ,-2.6919081 ,-0.6557734 ,-2.9248757 , 1.4998456 , 3.2239568 , 0.23668556,-3.4410136 ,-2.3170567 , 3.66808 , 1.9004405 , 4.3537745
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..182eb5290
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.4251418 , 1.8884782 ,-4.061519 ,-2.1329548 , 3.851976 , 3.668601 ,-0.7418167 , 2.379966 , 0.87259316,-3.96981 ,-4.627804 ,-3.3958297 , 3.025158 ,-1.299777 ,-4.322816 , 3.9173064 ,-0.55214256, 1.9224825 ,-4.8571157 ,-4.778045 , 3.3015614 , 0.56785774, 4.7985554 ,-0.4355816 , 4.9478025 , 1.7909397 ,-0.7620663 ,-0.09947702,-3.0230513 , 1.3817457 ,-4.5706887 ,-3.4097836 ,-4.7086477 ,-3.4651487 , 1.4401027 , 4.7513933 ,-1.0788624 ,-3.4946275 , 4.607974 ,-3.1215246 ,-1.4637078 ,-3.5266285 , 2.1268125 , 0.19458893, 4.058288 , 2.2452407 , 0.7575343 , 0.12213306, 4.885321 ,-1.2482406 ,-1.1034219 ,-4.054173 ,-3.6471267 , 4.774012 , 0.9450243 ,-2.5827825 ,-2.3991685 ,-2.8482654 , 0.9294943 ,-3.1165063 ,-1.6113516 , 0.04260086, 2.0987031 , 2.1601508 , 4.9740996 , 3.7719023 , 2.6817482 , 0.42131838,-1.4525859 ,-0.5124655 , 2.6313434 , 4.5606523 ,-4.6180778 , 4.788594 ,-0.8446551 ,-1.5460813 , 1.4288356 ,-1.9648911 ,-4.9766145 ,-2.405665 ,-0.30327383, 3.5204673 ,-3.848158 ,-2.6913974 ,-2.76141 , 4.336643 , 1.4205143 , 4.5898 ,-0.93183124, 4.2199287 ,-4.216924 ,-1.0979122 ,-2.3032405 ,-3.4457245 , 2.944412 , 2.137278 , 1.0326933 , 2.3116126 , 4.2138443 , 1.8283377 , 0.28901085,-1.8877143 , 0.50673705, 1.4360197 ,-2.924691 , 0.9819095 , 3.4656513 ,-2.541582 ,-1.9102442 , 3.3629627 ,-0.9675056 , 0.5937253 ,-2.4236617 ,-1.4193813 ,-0.7552614 ,-1.7121441 , 4.39647 ,-2.2712908 ,-4.3387337 , 1.5912663 , 0.8397044 , 0.17277755, 1.5272428 , 3.571715 ,-1.4471695 , 1.8623346 ,-4.3603377 , 1.2116091 , 4.960487 , 2.3681397 , 1.2925869 ,-4.3249073 , 2.4402251 ,-1.4506928 , 3.023616 ,-3.232099 ,-4.0106025 , 3.5774167 ,-0.6024932 , 1.0183483 ,-2.8215308 , 3.7395437 , 1.9100485 , 3.892712 , 4.6569633 ,-3.251774 ,-3.6923678 ,-4.8891983 ,-3.8605282 ,-4.0293036 ,-2.8199108 , 4.1668954 , 2.1569817 ,-2.9700332 ,-0.7035824 ,-0.5176811 ,-3.1826456 ,-3.334556 , 4.9103675 , 3.8513231 , 2.8609774 , 1.1845547 ,-1.4094447 ,-2.0445833 , 0.9833705 , 4.481276 , 3.83006 , 4.6240997 ,-4.268881 ,-0.85518706,-2.2650888 , 4.032545 , 0.9495817 , 1.1353155 ,-4.6551876 ,-2.2839146 , 2.6291692 ,-3.0398533 , 0.52652216,-1.8323399 ,-0.12300313, 0.46178594, 1.120684 , 1.4657134 ,-1.9794375 , 0.08941289,-4.4573083 , 2.7112565 , 4.9227715 , 2.4938288 ,-0.37153494,-4.1604757 , 4.7694197 ,-1.3021677 , 2.454714 ,-2.4902875 ,-2.760436 , 0.05183195,-2.6723208 ,-1.1471758 ,-2.2565122 , 0.20876396,-0.7288584 , 0.4386669 , 0.7846054 , 2.7294593 ,-3.836883 , 2.7501638 ,-4.775067 ,-3.2403855 ,-2.0307286 ,-1.6403166 , 4.9471517 , 1.0428456 , 2.5126355 , 3.0090203 ,-2.3476288 ,-2.9215205 , 3.8079188 , 0.83959275, 4.2670302 , 1.2338712 , 2.7329903 , 2.2549257 , 4.882931 , 0.12783106,-2.4392028 ,-2.4590807 , 4.2874207 ,-0.08333418,-3.4244132 ,-0.2235516 ,-4.23632 ,-1.3970895 , 2.1245553 ,-2.513883 ,-2.8092728 ,-1.9194845 ,-4.1932216 ,-3.7431748 ,-1.1063433 ,-3.714845 , 1.7230242 ,-0.19162221, 1.1123114 , 3.937181 , 2.6165597 ,-0.61531806, 0.44309503,-2.9260228 ,-3.1617007 , 0.0663496 , 2.4541974 ,-2.714474 , 4.2564497 , 1.2300675
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..dd8037244
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.8834 ,-4.6238756 , 2.020674 ,-2.3068821 , 3.7487323 ,-0.36079448, 0.08661745, 3.423143 , 3.3073757 ,-2.709357 , 4.4810205 , 3.4159606 , 4.1597505 ,-4.249789 , 2.3782206 ,-2.02848 , 0.90137833,-0.6249625 ,-3.5300052 ,-4.1113796 ,-3.768913 ,-3.59854 , 2.0896666 , 1.7677166 ,-2.3101497 ,-1.0116942 ,-3.7846713 , 2.4777756 , 3.413987 ,-2.1964507 , 0.08637846, 0.02552292,-1.9918599 , 0.7785565 ,-4.065995 , 0.8808776 ,-2.0446506 ,-1.8421272 , 0.42566776, 3.8834689 , 4.900111 ,-3.0617309 , 4.0613194 ,-3.3601153 , 3.678536 ,-4.1136184 ,-4.2903633 ,-2.6918027 , 3.4335177 ,-3.9272869 ,-1.6882807 ,-1.9629028 , 4.2125826 , 1.6536059 ,-1.1801353 , 4.8443203 , 2.9393198 , 0.4306524 , 4.390743 ,-4.6322317 , 2.932263 , 4.140538 , 2.7385068 , 2.620753 , 2.0725663 ,-1.3642436 ,-0.48539641,-4.2409816 ,-1.5950899 ,-1.688442 , 4.4769464 ,-1.25038 , 3.462903 , 0.5011836 , 0.981037 , 0.63532305,-3.4727957 , 4.6721544 ,-3.481392 , 2.8904114 ,-1.7057139 , 1.0501702 , 3.0799537 , 1.6698593 ,-1.3895478 , 4.487443 , 2.5352533 ,-0.19357985, 0.78166926, 3.5892236 ,-4.3259463 , 2.8381345 , 1.3652785 ,-0.40142608,-0.62102544,-3.088937 ,-4.0266094 , 4.7095647 , 2.0513067 ,-1.8115149 , 0.11062156,-4.5980725 , 2.809295 , 4.2042894 ,-3.4689455 ,-1.3418434 , 2.9026117 ,-1.6125411 , 2.153075 ,-3.4445221 , 3.4869678 , 1.8746428 , 0.8482056 , 3.0525062 , 1.715966 , 1.7684505 ,-2.0022326 ,-4.3427444 ,-3.1659825 , 1.6855526 , 3.1612136 , 2.0646648 ,-3.972224 ,-2.91726 ,-3.5450957 ,-2.7226381 ,-0.3273488 ,-2.5905557 , 3.6621993 ,-4.3285728 ,-0.6200474 , 0.08522832,-2.1981175 ,-3.4179437 , 2.5989106 ,-0.8503352 ,-3.3723786 , 3.9595454 ,-0.5431398 ,-2.6962373 , 1.9689399 ,-2.8925 ,-1.2064192 , 1.606632 , 2.2728612 ,-0.1403075 ,-4.8031726 , 0.1549256 ,-1.3698703 , 0.78889227,-2.286554 , 0.96417916,-0.10438658,-3.8131578 , 2.9322996 , 2.4103441 , 4.4864798 , 0.02176606,-1.1966147 ,-3.6921146 , 4.943659 ,-1.0050472 ,-1.2238564 ,-4.5758605 ,-2.6865735 , 1.7294792 , 4.180183 , 3.157911 ,-3.581904 ,-2.9112866 , 4.1674094 , 3.2326035 ,-2.7883985 ,-0.09154221, 0.8667318 ,-4.532571 , 0.816668 , 3.1307516 ,-4.1993947 ,-1.0503744 , 0.123965 , 0.17691068,-3.1465137 ,-1.4964765 , 3.4077635 ,-0.35415363, 1.9092371 ,-4.709203 , 1.148622 , 4.4766874 ,-2.193539 ,-3.7959206 , 1.4420112 ,-2.5300896 , 4.107192 , 3.4666913 ,-2.1158516 ,-3.182484 ,-2.8406513 ,-1.9396024 ,-2.3695247 , 3.8301885 ,-1.5032169 ,-0.48879272, 0.41695955,-1.1829228 , 4.822825 ,-2.9244933 ,-3.8178608 , 2.7742817 , 2.6998327 ,-3.1187122 , 2.508593 , 1.2989064 , 2.3436947 ,-0.39074868,-3.034766 ,-1.8690065 , 4.850296 ,-2.4549792 , 4.839528 , 2.2758777 , 2.6689568 , 3.2014422 , 3.6975234 ,-3.2566156 , 3.546554 , 1.9570364 ,-2.753807 , 2.3366053 ,-4.357898 , 4.9184504 ,-1.0057111 ,-3.8582199 , 1.2416974 , 4.355522 ,-2.7863925 , 0.4679685 , 2.6850772 , 2.9984746 , 2.434312 , 2.9931593 , 2.2637212 ,-0.18371914,-4.07688 ,-2.0402577 , 0.5173147 , 0.19596666, 4.71653 , 4.291663 ,-3.3575501 ,-1.0857964 ,-0.16504912, 3.6683955 , 2.9581416 ,-1.354989
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..1295bfdba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 1.2340723 ,-1.7371651 , 4.271641 ,-2.3332376 , 0.82301813,-3.4199295 ,-0.75806665,-2.2647665 , 2.613749 , 2.2658496 ,-2.1277714 ,-0.465433 ,-0.1323059 ,-1.9658507 ,-4.7780223 ,-4.392719 ,-0.81063855,-3.639001 ,-3.6398284 , 4.6309023 ,-0.17483327, 1.7921627 ,-1.1493484 ,-3.8145075 , 2.2367268 ,-0.40209827,-1.4159911 , 2.3032134 ,-4.154446 , 1.6760192 , 2.3430173 ,-1.386683 , 3.3363335 ,-2.976934 , 3.3983 ,-0.0069695 , 3.7025425 ,-1.8683758 , 0.72029626, 2.7558882 ,-4.4060984 , 2.553126 ,-3.5888321 , 1.8549582 ,-0.52258795, 4.6549897 , 0.8886988 ,-3.0400214 ,-3.6890693 , 3.6663766 ,-4.8026586 , 1.0636287 ,-2.9774907 , 0.39021772,-4.2414255 , 2.914968 ,-0.24334456,-4.0344954 ,-1.1011956 ,-3.8205252 , 0.05693521,-4.1379023 , 1.0584197 ,-4.0404034 , 4.841462 ,-1.2727845 , 2.6974225 ,-4.2507453 ,-2.7101111 ,-2.9800036 , 0.3082796 , 3.6763537 , 2.3277721 ,-4.9667864 ,-2.4498677 , 0.2704629 , 3.006634 ,-1.1129389 , 4.373073 ,-1.2066779 ,-3.1575904 ,-2.721046 ,-0.861226 , 1.7315729 , 2.255666 , 2.5448847 , 3.1268334 , 1.5189171 ,-3.1992466 , 0.607633 , 4.0749955 , 1.2546133 ,-1.5335796 ,-1.6200712 ,-3.9392874 , 1.053699 ,-0.87970537,-3.9218261 ,-2.2724128 , 0.82235074,-2.3400521 , 3.6467028 , 1.6891364 ,-1.6333519 , 2.2639709 ,-0.08272895,-3.076964 , 3.731091 , 3.7932968 , 2.496441 ,-4.12142 ,-2.0908666 ,-4.994248 ,-0.0429902 ,-4.6083336 ,-4.522535 , 4.717733 , 1.6715643 ,-4.779822 , 1.2919815 ,-4.6121325 ,-0.6206874 ,-2.6633883 ,-1.9632595 ,-3.2203329 ,-0.6556523 , 1.3083993 , 0.13287744, 4.599294 ,-1.1777852 ,-2.9159715 ,-0.25669238, 0.48217958,-3.9736347 ,-0.774503 ,-0.7264863 ,-3.0058725 ,-2.1682055 , 2.6579158 ,-4.4020653 , 3.0450368 , 1.3798735 ,-4.9858127 ,-4.5812607 ,-3.7349749 ,-4.4158583 , 1.631093 ,-3.0769646 ,-3.8406906 , 1.6544044 , 0.36895755,-1.8196682 ,-2.0880237 ,-3.708266 ,-2.0277069 , 1.0536597 ,-3.6726243 , 1.1704421 , 2.3201573 , 1.4994124 , 4.0197086 , 2.1001272 ,-0.39845964, 4.879206 ,-4.6042013 , 4.367211 , 2.2712052 , 2.7754369 ,-3.156667 , 4.349216 ,-4.111492 , 1.0267047 ,-2.3381946 , 4.8876834 , 4.876814 ,-0.28538027, 4.8861 ,-0.95963717, 0.46279734,-4.5789995 , 0.26168647,-0.8879058 , 2.4468584 , 1.3030591 , 3.7261188 , 3.9933589 , 2.4964094 ,-1.3851117 , 0.7147012 ,-3.8367457 , 0.79737735,-0.5907085 , 4.317288 , 0.7659837 ,-4.821792 ,-1.466433 ,-1.147227 ,-1.8638811 , 2.5115767 , 1.9449657 ,-2.4122007 ,-2.4968379 , 0.7738737 ,-1.4761454 , 4.131583 , 0.4211128 ,-2.4312468 ,-1.9722428 , 2.2810268 , 4.950381 ,-0.0406047 , 4.67312 , 0.66613483,-0.28880936, 3.2917845 , 1.6225572 , 4.809879 , 0.48241946,-3.654634 , 0.68542016, 1.3973923 , 3.479005 ,-1.4296091 , 0.64391786,-4.0887494 ,-2.186845 ,-4.5834355 ,-0.67726034, 2.4158256 ,-2.4787726 , 0.4353257 , 2.9205139 , 0.10488439, 2.0790074 ,-4.5518365 ,-3.3856661 , 3.940736 ,-1.7141095 ,-4.8946457 , 1.1085542 , 3.785141 ,-2.4175835 , 3.7720537 , 4.623048 , 2.2239215 , 0.11616404, 0.09229392,-3.637964 ,-2.334849 ,-0.95000714,-2.1338253 , 3.2281857 ,-4.0220475 , 4.7304025 ,-1.8075961 , 0.2428817
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..378b5fea5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.4605505 ,-2.7001262 ,-4.3874917 ,-2.9867616 ,-3.4332 , 0.76675916, 3.4377892 ,-0.6712793 , 1.8018581 , 1.8148962 , 2.0353577 ,-4.766427 , 3.2487285 , 3.886249 ,-2.8867183 ,-0.7906634 ,-4.376028 ,-4.2085958 ,-0.36025277, 0.6360799 ,-4.687723 , 4.8313313 , 3.3582768 , 2.1117954 , 0.9821817 , 3.3697798 ,-1.1784939 ,-3.1590316 ,-0.24019621, 0.20640443, 1.2808957 , 2.3346424 , 2.13951 , 0.61864626, 2.4020443 ,-1.9671458 ,-1.6852348 , 0.32225233,-2.3928862 ,-4.173372 ,-2.282281 ,-1.271318 , 3.0839682 ,-4.4726086 ,-0.635177 , 3.2710915 , 3.08071 ,-0.7311931 , 2.1444874 , 0.4102332 ,-3.332888 ,-4.8965516 , 3.903695 , 1.4920163 ,-4.041926 ,-0.3941788 , 3.6352818 ,-2.098405 ,-0.9248165 , 2.6277795 , 3.225142 ,-1.4461963 ,-4.2050753 ,-0.2213572 , 1.9704323 , 3.298732 ,-4.710403 , 3.6876736 , 2.0771818 , 1.3559113 , 1.328373 ,-4.4079022 ,-3.28067 , 3.8852313 , 2.322237 , 2.3243637 ,-1.9126451 , 4.6277676 , 1.7031307 , 0.74861574,-4.688967 , 3.9351206 ,-1.8054084 , 1.5824287 , 3.5381088 , 2.4798677 ,-3.3099444 ,-3.8518245 , 1.5562242 ,-1.9466928 , 0.08375791,-0.16754703, 2.9265418 ,-1.6599798 , 2.766202 ,-2.8269696 ,-0.19389874, 2.0869334 ,-1.5073173 ,-3.2024453 ,-3.6522708 ,-4.588111 ,-2.3425827 , 4.8709297 ,-1.4231887 , 1.0590451 ,-1.6406479 , 0.37192422, 0.7313186 , 0.3865313 ,-4.2832613 , 3.9712496 , 0.07653506, 0.2593589 ,-2.6036396 ,-0.45185068, 3.6537335 ,-0.6341783 ,-0.6381408 ,-1.0992868 , 2.766365 , 4.666631 , 4.416099 ,-3.6654727 ,-4.0626607 ,-3.4928396 ,-0.6944366 , 4.869798 , 4.2240977 , 0.9655519 ,-2.5654511 , 1.3396966 ,-3.7639391 ,-1.2369057 ,-3.7242758 ,-0.5189227 , 1.6548159 ,-2.6197302 , 4.2732763 , 2.239486 ,-4.316255 , 3.2419755 ,-1.9283817 , 0.22489135, 2.6034477 , 0.15818155, 2.0811818 , 0.836994 , 2.7832468 ,-0.68581384, 0.89475006,-3.1455147 ,-4.818614 ,-4.1738377 , 0.4281551 ,-2.935886 ,-3.7582467 , 0.58168256, 0.2854076 , 1.0492616 , 2.2415884 ,-4.4923434 ,-3.2479804 , 3.8439462 , 3.9802108 ,-0.9027783 , 1.7783072 ,-2.2782066 , 4.4638705 , 4.28735 , 4.291463 , 1.1685107 , 1.2765578 ,-3.7954235 ,-3.494621 , 4.4340134 ,-3.5995178 ,-4.3025713 , 3.3037348 ,-3.6675146 ,-1.7871013 ,-1.2922373 , 0.72924066,-4.7065907 , 2.1388702 , 2.3570008 , 3.9203117 , 0.07483537,-2.8389792 ,-1.795164 ,-4.380931 , 1.3189598 , 2.4404252 , 4.4774084 ,-1.2798066 ,-4.95842 , 1.8095461 , 4.2692375 ,-2.0918155 , 0.33083543,-3.794544 , 1.4940621 ,-3.9446015 ,-0.38208306, 0.30863285,-0.6832849 ,-2.5675633 ,-4.948772 , 1.5904989 , 3.0415509 ,-4.899339 , 0.9415345 ,-0.91124976, 4.4849253 ,-3.4605968 , 1.6737833 , 1.9091597 , 1.3111106 , 2.0829957 ,-2.1308084 ,-2.912219 , 1.1306196 , 2.231948 , 4.7522073 ,-2.1438766 ,-2.1000512 ,-0.2984778 ,-1.2093959 , 2.6259391 , 1.8113437 ,-4.137133 , 2.716111 , 3.4318748 ,-0.89123845,-3.70718 , 2.453927 ,-0.22418758,-3.098459 ,-4.4986243 , 0.85048616, 2.8023102 , 3.743153 , 0.9931644 , 3.8588202 , 1.7585737 ,-4.2855363 ,-2.5475764 ,-0.83141845,-1.9358089 , 3.1711586 , 2.4221613 ,-1.881327 ,-3.7230873 ,-4.55259 ,-0.42294836, 4.64625
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..339435425
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-3.37344313e+00, 2.78325319e+00,-7.30300546e-01, 1.33456266e+00, 3.96648932e+00, 4.33421373e+00,-3.11558557e+00,-3.64659280e-02,-1.73589993e+00, 4.81018400e+00,-8.32905114e-01, 2.33330703e+00, 1.85830116e+00,-4.60395622e+00, 5.26070774e-01,-4.71355534e+00,-2.97202754e+00, 3.57638383e+00, 4.50985909e+00, 2.08423686e+00,-1.85349309e+00,-2.18306184e+00,-4.65403509e+00, 4.31280661e+00, 1.16069472e+00,-4.85344124e+00, 8.40563923e-02,-1.98723459e+00,-4.29561710e+00,-2.57372570e+00,-4.22641230e+00,-4.00811911e+00,-9.61861551e-01,-2.14665198e+00, 4.18120289e+00,-3.87826174e-01,-2.86187083e-01,-4.84979200e+00,-1.34733701e+00, 1.27489030e+00, 1.98844969e+00,-4.11230135e+00,-1.61191213e+00, 2.63515592e+00, 4.35539484e+00,-1.56582773e+00,-2.45283508e+00, 1.44556177e+00,-8.56053472e-01, 3.25111747e+00, 3.58699083e+00,-2.47732449e+00, 3.64130282e+00,-4.91288567e+00, 8.97059917e-01,-2.26010180e+00, 4.91831064e+00, 4.45047706e-01, 1.88655663e+00, 3.20642543e+00, 1.38243341e+00, 9.06112790e-01, 1.15262544e+00,-2.39862514e+00,-2.87477684e+00, 7.36831248e-01, 3.18799114e+00, 1.22698748e+00, 5.63625395e-01, 1.29130912e+00,-4.89572334e+00, 2.11258578e+00,-4.55420208e+00, 4.94569272e-01,-7.08617330e-01,-1.84863120e-01,-4.81965256e+00,-1.06512284e+00, 4.79633398e-02, 2.70429182e+00, 4.78289175e+00,-2.11806059e+00, 4.23046875e+00, 3.18022132e+00,-8.39496255e-01, 3.13150501e+00,-3.24103773e-01,-7.48505890e-01,-2.45754886e+00, 4.16639376e+00, 3.25864077e+00, 3.40006447e+00,-3.77217412e+00, 2.93266010e+00, 3.33685803e+00, 1.02347994e+00,-2.22839618e+00,-1.90375733e+00, 3.24283957e+00,-4.01684284e-01,-4.45417643e+00, 3.74440104e-01, 3.33520865e+00, 6.64106190e-01, 3.84395885e+00, 2.38586918e-01,-1.51634857e-01,-2.64977455e+00,-3.45786500e+00, 4.89002228e+00,-1.07323432e+00,-2.92749858e+00,-1.76510501e+00,-3.44604325e+00,-1.89681911e+00, 4.20239258e+00,-1.75864971e+00, 2.13181686e+00, 3.90355319e-01,-4.11911535e+00, 6.61891177e-02,-4.32988214e+00,-1.42876351e+00, 3.12163901e+00,-4.56227779e+00, 4.17938662e+00, 9.63881195e-01, 4.35952139e+00, 1.61931109e+00, 4.11196423e+00, 2.25612569e+00,-4.77538586e+00,-1.72600198e+00,-4.39411783e+00,-8.98730099e-01,-1.04562032e+00,-2.81517529e+00, 3.57167959e+00, 1.90318239e+00, 2.17302442e+00,-3.79942179e+00, 2.19838643e+00,-4.16209459e+00, 4.45025682e+00, 1.68786839e-01,-2.56879544e+00, 3.60925221e+00, 1.06542781e-01,-3.48755455e+00,-6.77028894e-01,-3.51582170e+00, 3.90697241e+00, 4.49116230e+00,-1.56180394e+00, 4.96249914e+00, 9.63374436e-01, 2.72304177e+00, 8.38046610e-01,-2.91993833e+00,-9.41783428e-01, 8.00800502e-01, 3.89176035e+00, 6.70560122e-01, 2.76782703e+00,-1.37075472e+00,-3.25303817e+00,-4.41226482e+00,-8.38777184e-01, 1.73568249e+00,-1.09438455e+00,-1.08815920e+00, 1.06787062e+00, 2.04415274e+00,-2.93027782e+00,-6.86941504e-01, 3.83109421e-01,-3.49270535e+00,-2.13225913e+00,-3.61786675e+00, 1.32213378e+00,-2.89654016e+00, 4.23944092e+00, 4.53665400e+00, 4.26081800e+00,-1.95718706e+00, 4.72295076e-01,-3.08592963e+00, 2.53354859e+00, 3.80069661e+00,-1.14408419e-01, 2.39438844e+00,-4.73618507e+00, 2.35079074e+00,-1.43686843e+00, 1.32946157e+00, 1.10381134e-01,-3.49878430e+00, 2.83181930e+00, 4.57872486e+00, 2.29953095e-01, 7.19881415e-01,-2.97208834e+00, 4.11286211e+00,-3.89149117e+00, 3.83631349e+00, 4.14627981e+00,-1.14082299e-01,-6.89825296e-01,-2.55468488e+00,-4.04466152e+00, 9.95541453e-01,-2.59181118e+00,-4.60567427e+00,-4.77339029e+00,-7.36041367e-02, 1.85957468e+00,-3.42530179e+00, 4.55782986e+00,-3.29603004e+00, 3.55632234e+00, 2.40858841e+00,-2.07399082e+00,-3.96705031e+00, 4.41718817e+00, 3.19581985e+00,-3.72379017e+00,-3.76826024e+00, 6.79764748e-01,-4.43838930e+00, 2.29627752e+00, 2.34923697e+00,-4.23308420e+00, 3.80186272e+00, 8.65862250e-01, 8.44927967e-01,-1.05974531e+00, 4.70531940e+00, 1.25060010e+00, 4.82314730e+00,-4.53093815e+00, 4.51410580e+00, 4.95166332e-01,-3.45584202e+00, 1.82002666e-03,-3.27616286e+00,-2.68104935e+00, 2.39554620e+00, 2.99364328e+00,-2.57998848e+00,-4.35891914e+00, 4.64737415e+00,-5.74958742e-01, 6.47293210e-01, 1.85961032e+00, 4.49567413e+00,-4.36166048e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..e0e52c398
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.5734663 , 3.96675 ,-2.7826853 , 4.377681 , 1.8424977 ,-2.8312624 , 0.65628445,-3.7023883 ,-1.8941027 , 0.53154576,-3.9718776 ,-3.3961854 ,-2.7500536 , 2.6793208 , 3.3515985 , 2.0939343 ,-4.3965416 ,-1.7462187 , 0.5660886 , 4.497879 ,-2.2529721 ,-4.8996797 ,-0.00740948,-2.941367 , 1.9482567 ,-2.462802 ,-0.7897884 , 3.1501546 , 3.1216884 ,-3.506249 , 2.871302 ,-3.964653 ,-0.40679944, 2.8930066 ,-4.783338 ,-1.8733944 , 2.2654383 ,-0.41361305,-3.7790897 ,-1.9458629 ,-2.274427 ,-2.9192872 ,-0.73215395, 2.8135974 , 2.1402152 , 4.516366 , 1.58816 ,-4.607831 ,-3.5409598 , 1.9784997 , 3.11111 , 1.0872442 ,-3.6907403 ,-4.774325 ,-4.9267297 , 1.2962086 , 2.4646177 , 2.2726526 , 4.8766675 ,-2.9272413 ,-0.06221364,-0.80498594,-2.319938 ,-3.8261194 ,-2.3452706 , 2.5408983 ,-0.80628425,-1.4547366 ,-4.4171157 , 3.1584027 , 4.2213454 , 3.0342784 , 2.0285478 , 3.4517126 , 1.870827 , 2.812075 , 1.0776864 ,-4.524331 , 3.1467574 ,-2.366355 ,-4.7368546 , 1.940347 , 4.282059 , 1.2666475 ,-4.9559174 , 2.8177614 , 1.1941892 ,-0.25412267,-2.833778 , 1.1770393 , 4.9503546 , 4.582686 ,-1.0778978 ,-2.9030416 , 3.2517505 , 1.556093 ,-3.7605543 , 0.5915735 ,-2.6323159 , 4.596147 ,-0.90292877, 2.8230112 , 4.9295835 , 3.523853 , 1.7742149 ,-2.6014073 , 2.162894 , 1.9364033 , 4.0920115 , 0.81613404, 2.4198878 ,-0.907447 ,-4.79113 ,-3.4193892 ,-0.3334577 ,-1.0439668 , 4.2233415 , 1.4482704 , 1.3646252 ,-0.9206041 , 4.4994802 ,-4.2411633 , 0.6763335 ,-1.3827848 , 1.8579848 , 1.6426222 , 0.904467 , 3.876264 ,-4.6476808 , 4.576801 ,-1.4680524 , 2.441134 , 3.2343059 , 0.23119794, 2.5640545 ,-0.7293438 , 3.7184558 ,-1.6056752 , 3.1490617 , 4.6837263 , 4.7100887 ,-2.785927 ,-0.1520597 ,-1.9914767 ,-4.00598 ,-2.7502792 , 3.7857378 , 2.8444788 , 4.9911737 , 0.29277426,-4.779576 , 3.223367 , 1.3517398 , 4.8757277 , 3.8083189 , 1.7660266 ,-2.1543872 , 4.822371 , 2.089687 ,-4.7373757 ,-2.4061642 , 2.0387447 ,-4.067881 ,-3.1757388 , 0.24974413,-0.24441184,-0.1168329 ,-0.35149318, 2.0035832 ,-4.248678 ,-1.4723817 , 3.8218668 ,-2.8085105 , 4.6995482 ,-3.0093114 ,-3.648268 ,-1.0374364 , 0.04459473, 2.3945484 ,-0.63439727, 3.3920286 , 2.403765 , 1.303556 , 3.232244 ,-0.44932058, 0.9601637 ,-3.3821623 ,-4.257736 ,-4.095783 , 0.42818338,-4.925627 ,-1.8419602 , 4.9393196 , 0.8049334 , 4.431875 , 2.8487725 , 2.1205912 , 1.7367444 ,-4.337498 ,-3.574642 ,-3.8927085 ,-0.35219863, 2.8415039 ,-0.2887568 ,-0.89806557, 2.669602 , 4.8017626 , 4.278042 ,-1.2604581 , 3.152027 , 2.1625066 , 1.5039738 ,-3.7209976 ,-0.72354925, 4.006067 ,-3.7651584 , 0.7198826 , 3.9594896 , 0.6228397 , 2.8464649 ,-0.18740664,-2.0530953 , 3.5185826 , 2.5037062 , 0.3990585 ,-4.423475 , 4.6931167 ,-1.0078553 , 0.74727917,-4.289701 , 1.697721 , 3.4963684 , 1.5796075 , 2.296678 ,-2.9379995 , 4.4748416 , 0.25155628, 4.1183267 , 0.9506131 , 1.2903908 ,-4.6828184 ,-2.309908 ,-4.2793307 ,-2.2069294 ,-4.038367 , 4.641971 ,-2.3178709 ,-2.2683682 ,-0.96986157, 2.6649144 , 2.3106637 ,-1.8052462 ,-4.9433284 , 1.7941002 , 4.80127 ,-0.06690114
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..9a8f222e7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 2.2282960e+00, 1.0135865e+00,-4.1930809e+00, 5.3674412e-01,-3.2516165e+00, 1.2745492e+00, 4.2867136e+00, 1.9524460e+00,-3.6757104e+00,-3.6086998e+00,-9.4525421e-01,-3.4005399e+00, 3.3607626e+00, 4.2363039e-01,-2.5177178e+00,-3.0130227e+00,-4.1442380e+00, 4.4951862e-01,-6.4387190e-01, 4.3701029e+00,-3.6790867e+00, 3.2749624e+00,-2.2554400e+00, 1.8269253e+00, 1.8358005e+00,-6.0994375e-01, 3.5964453e+00, 4.8953295e+00,-2.6134133e+00,-3.9301482e-01, 4.0286818e+00,-8.9392501e-01, 2.6430035e+00,-1.0339550e+00,-4.2311502e+00, 5.1657695e-01,-3.0095081e+00,-3.2156844e+00, 3.0075660e+00,-2.4905038e+00, 2.2380588e+00, 4.6933036e+00,-2.7880669e+00,-3.3672907e+00, 2.5187421e+00, 2.1843061e+00,-3.9957666e+00,-4.5409918e+00,-1.7282218e+00,-4.6849327e+00, 3.1863580e+00, 2.4342964e+00,-4.5180349e+00,-2.4310455e+00,-2.6789901e+00,-1.6438740e+00, 4.9613748e+00,-3.7800386e+00,-4.4277740e+00, 1.0571244e+00,-3.3765689e-02,-6.2219787e-01, 2.1075857e+00,-2.0555353e+00, 2.6996508e+00,-3.0303302e+00,-3.8262250e+00,-4.5048919e-01, 2.6760142e+00, 3.2696848e+00, 2.8136756e+00,-2.7064829e+00, 8.5861349e-01,-1.8871003e+00,-9.5355767e-01, 2.3704410e+00, 4.8897211e-02,-4.6371531e+00, 1.5693765e+00, 3.7866819e+00,-2.9738419e+00, 1.2106347e+00,-5.8760280e-03,-6.4124316e-01, 4.2396611e-01, 4.8550687e+00,-3.0650468e+00,-1.2087260e+00,-2.4833875e+00, 2.1272743e+00,-1.8991195e-01,-3.5372739e+00,-2.3402226e+00,-1.0234243e+00, 2.8981063e+00, 8.7964945e-02, 3.2136328e+00,-3.4051507e+00,-4.5538807e+00,-4.0228786e+00,-1.8993270e-01,-4.5704255e+00, 1.8850164e+00, 9.9910229e-01,-4.8424377e+00,-3.1492932e+00, 2.3922281e+00, 4.8503261e+00,-2.1037047e+00, 3.3602579e+00, 1.3546667e+00, 1.3481154e+00,-2.3604252e+00,-1.3253393e+00,-3.5330158e-01,-2.1313765e+00, 3.1442962e+00,-1.1570807e+00,-4.5890884e+00,-4.1608801e+00, 1.8554245e+00, 2.4646142e+00,-1.8453486e+00, 3.3489871e+00,-1.1248070e+00, 3.1451607e+00,-1.4458319e+00,-2.2727523e+00,-2.0378258e+00, 2.4566815e+00, 3.8839689e-01, 4.2570353e+00, 2.3613093e+00, 1.2956337e+00,-7.5734973e-01,-1.4549307e+00, 9.3240172e-01, 4.3444591e+00,-6.4935732e-01, 2.5328317e+00,-2.3545196e+00,-4.7553263e+00, 2.6134777e+00,-2.5526178e+00,-1.7996631e+00,-2.0215256e+00,-4.6141486e+00,-1.7283168e+00, 2.5297335e-01, 3.7009020e+00,-1.9858284e+00,-3.4631619e+00,-1.5858738e+00,-2.5620985e+00, 3.2822473e+00,-3.2632313e+00,-9.0714562e-01,-2.3562717e+00, 4.4088845e+00,-3.6630182e+00, 5.5761892e-01, 1.6045070e+00,-3.6806375e-01, 4.3184443e+00,-1.3219705e+00, 1.5496376e+00,-1.5801797e+00, 2.1545045e+00,-4.0106788e+00, 3.4172714e+00,-4.2495294e+00,-6.1115064e-03,-7.2607052e-01,-7.3130745e-01,-4.4462271e+00, 4.8119636e+00,-4.7460346e+00,-3.0464313e+00,-2.8801811e+00,-1.4347218e-03, 4.4133449e+00,-3.3173063e-01, 4.3802023e+00, 2.6040417e-01,-2.5531218e+00, 3.7436140e+00,-4.1636271e+00,-3.3907690e+00,-1.4418361e+00,-3.6933661e+00,-2.6342602e+00,-3.1492887e+00,-5.5590755e-01,-1.6814464e-01,-1.0868104e+00, 4.9451909e+00, 3.4104226e+00, 1.0342516e+00, 4.7993002e+00, 1.2480364e-01, 1.6109833e-01, 2.6366503e+00, 1.6535910e+00, 4.3810592e+00, 4.4755011e+00, 4.3265424e+00,-3.1934264e-01, 9.8549920e-01, 1.9962710e-01, 2.8525822e+00,-3.7352023e+00,-1.3402178e+00, 2.5931063e+00,-2.6708813e+00,-7.6831090e-01, 3.0769660e+00, 1.4107993e+00,-1.8936746e+00,-4.7568636e+00,-1.9222193e+00, 4.7693071e+00, 2.8644614e+00, 4.1877995e+00,-3.6974251e+00, 4.5314616e-01,-7.1986055e-01, 4.8653622e+00, 1.4722897e+00,-8.6220115e-01,-4.1846976e+00, 3.7767217e+00, 3.7630556e+00,-4.5851058e-01,-4.9183292e+00,-1.8750135e+00, 1.0773923e+00,-5.2709883e-01,-9.2767686e-01,-1.3984675e+00,-2.0892789e+00,-4.3801632e+00, 4.0080590e+00, 4.2269025e+00,-1.2195336e+00,-2.2649438e+00, 4.6874623e+00,-3.8354571e+00, 5.9588730e-01,-2.8315885e+00, 3.0605823e-01, 2.1416895e+00, 1.6045133e+00,-3.3075256e+00, 4.9898911e+00, 1.7708080e-02, 3.5305614e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..1b2e33401
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 1.9229428 , 2.1045275 , 2.0514195 , 1.7149676 ,-4.1647053 , 4.3958654 , 2.1192055 ,-2.4357705 , 2.249189 , 4.7986865 ,-1.0146881 , 2.5108647 , 0.7262246 ,-2.3110187 ,-0.434008 , 2.6220334 , 1.3261455 ,-2.0402927 , 0.6362597 , 0.12827367, 0.94167644, 1.6396433 , 2.802215 , 0.92637545,-2.8669958 , 2.1684341 , 4.7197456 ,-3.0393784 ,-1.5588902 ,-1.5589788 ,-1.2792847 ,-4.301159 , 3.6853306 , 3.5522077 ,-3.5120559 , 3.6523628 , 0.52381915,-4.3210206 , 3.1021209 ,-4.4059095 , 4.574733 ,-3.708168 ,-3.4609973 , 0.04494883, 4.6041393 , 4.6209555 ,-2.184693 , 3.3114836 , 4.0440845 ,-4.362543 ,-3.0185041 ,-3.4911432 ,-1.0443465 ,-3.1546419 ,-3.0831194 ,-1.8959469 ,-3.7653599 ,-1.8753844 , 3.969308 , 4.0960746 , 0.256032 ,-0.11065102, 4.753394 , 4.8433857 , 0.17249103, 0.44612473, 3.5996687 ,-3.7071083 , 4.15448 , 2.7609568 , 0.7979912 , 2.6985793 , 0.24981445,-0.7343978 ,-3.8946455 ,-3.4738345 ,-2.0124238 , 4.6603985 , 0.9002829 ,-2.2128618 ,-0.8752893 ,-3.0990481 , 2.770291 ,-1.4642559 , 0.4561498 , 0.5808671 , 2.4227936 ,-2.400878 , 0.6494001 , 1.0195295 ,-3.2693145 , 1.9889433 , 3.5208216 , 3.6280289 , 4.322899 ,-2.805155 , 3.7704606 , 0.6797415 , 4.442675 ,-0.5069875 , 1.3373847 , 4.6953626 ,-0.7946793 ,-2.7352958 ,-1.9969261 , 0.43059692, 2.50853 , 1.9314603 , 1.3780333 , 2.0536468 ,-1.572231 ,-4.5323825 ,-1.3175989 ,-1.5515776 ,-0.05870355, 0.32408538,-4.2935586 ,-1.561555 ,-1.7551405 ,-0.93950266, 3.2540953 ,-4.623753 ,-3.4944966 ,-0.7603045 , 0.76591074,-4.9114766 ,-2.679303 , 0.12950227, 4.094419 , 4.781908 ,-3.6946337 , 2.766349 ,-0.45678583,-2.275264 , 2.0858452 , 3.1182098 ,-1.2942638 , 4.4418044 , 2.2264028 ,-3.3838644 , 1.4427853 , 3.7365992 ,-1.1815038 , 1.4555137 , 0.22728541,-0.18817298, 3.454521 , 3.1835914 , 4.0786743 ,-1.5111316 , 1.1560454 ,-0.04693017, 0.44183066,-0.7420173 ,-1.2243766 , 3.4453049 ,-2.969513 ,-0.82397145, 4.870895 , 3.0178127 , 1.7217305 , 4.482936 , 1.9468685 , 3.9970267 , 4.7294793 , 2.9921744 , 4.470473 , 4.7626653 , 0.13104612,-4.651569 , 2.7991815 ,-4.734433 ,-2.4499187 , 1.0739365 ,-1.5583646 , 3.6531756 , 2.7731194 ,-4.72427 ,-4.5801177 ,-4.035709 , 2.5767221 ,-2.8133557 ,-1.8342617 , 3.5808434 ,-2.1022995 ,-3.5421894 ,-3.0776916 , 3.168665 ,-0.07246887,-1.2413273 , 4.7964606 ,-1.0624843 , 0.75939703, 2.5336463 ,-4.8622346 ,-4.9744167 , 2.1007512 , 1.5271608 , 0.37077245, 1.7765028 , 2.2724373 , 2.1864665 ,-0.37378153, 1.3559381 ,-1.4220421 ,-1.4756224 , 3.6143627 , 2.7846546 ,-2.5194893 , 3.005039 ,-3.6451447 ,-1.9118739 , 0.04718782,-3.0775185 ,-1.4801219 ,-2.35909 ,-0.4728799 , 4.610093 ,-4.472677 ,-4.530808 , 0.12514372, 0.05973044, 4.457302 , 3.1129916 , 3.6036162 , 4.5086145 ,-3.548999 , 0.4976606 ,-3.6525648 ,-2.1937015 ,-1.3205789 ,-2.6594079 , 4.415343 , 3.219482 ,-3.7286756 , 3.4116418 , 0.82889384,-3.0168123 , 4.382766 , 2.7633846 , 3.6949344 , 3.9806223 ,-0.6415279 ,-0.3193684 ,-1.3176754 ,-1.4990829 , 4.694691 ,-1.0581211 , 1.2103747 ,-0.26690048,-1.157015 ,-1.8951306 ,-0.8580171 ,-4.3080263 , 4.0737123 ,-1.2607352
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..50ed09011
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9386005 , 3.7248888 , 3.3261378 , 4.8302746 ,-3.9337704 ,-4.2943096 , 0.16059242, 0.17785172,-2.4971933 ,-2.933359 ,-4.598231 , 4.7816315 ,-0.6563864 , 4.452592 , 1.8066075 , 3.1572745 , 4.500678 ,-1.1609873 ,-1.6962403 , 1.567031 ,-3.3120036 , 1.8150452 ,-2.7486987 ,-1.6800771 , 1.4895486 , 1.120401 , 1.4983965 , 4.7132416 , 0.39645562,-3.12486 ,-0.5966056 , 4.618641 , 1.225812 , 0.99017185, 3.9918585 , 1.299415 ,-1.2995726 , 4.202907 , 3.8657827 ,-4.0268126 ,-0.90370494, 0.5030568 ,-2.9651542 ,-4.1249614 ,-2.8990393 ,-4.1228724 ,-1.2640246 ,-0.72640723,-1.7128279 , 2.7710931 , 2.8189523 ,-0.8384207 , 0.71266395, 3.8393862 ,-1.7801509 ,-3.1485069 , 3.2076547 , 2.267659 ,-3.745656 ,-4.373508 , 0.86005193,-4.9145784 , 0.9253047 , 1.1243923 , 0.46507052, 1.9978004 ,-4.642887 ,-2.1898057 , 0.88199854,-2.1837327 , 1.1112527 ,-1.4548608 ,-3.5766103 ,-1.5607064 ,-3.630397 ,-1.9193211 ,-0.8931484 ,-0.2812017 ,-1.2881653 ,-2.5051243 ,-3.5648384 ,-0.5431733 ,-0.47036746,-2.8132265 ,-0.4302025 ,-4.003176 , 0.31743896,-3.074693 ,-3.3994603 , 0.62276137, 0.12920536,-2.5154057 ,-0.22098878,-2.711012 ,-0.303956 , 4.6025276 , 3.1887815 ,-0.50345755,-2.6543994 ,-0.8452558 ,-1.4075644 , 3.6716504 , 2.7388885 ,-4.9426928 , 3.5494354 , 4.777085 ,-3.3904083 ,-2.4746811 ,-2.943489 , 1.3607427 , 1.313449 ,-2.7959676 , 4.5932074 , 0.2460288 ,-1.1802251 , 0.6807028 ,-3.7335384 ,-0.30950046, 0.0558207 ,-4.7604976 ,-4.5745177 ,-3.3872643 ,-1.102581 ,-1.5612804 ,-1.2933319 , 4.5290637 ,-2.5096595 , 0.8673844 , 0.6069363 , 0.8294639 ,-0.05487671,-2.5923786 , 3.2974155 , 2.252853 ,-2.4157743 , 1.6614583 , 1.975577 ,-2.7390766 ,-0.26459846, 0.8946814 ,-3.257953 , 4.0526175 ,-1.5219783 , 4.6063023 ,-0.09599628, 3.2825923 , 2.0063279 ,-3.597641 ,-0.41604096,-2.5593333 , 1.8169669 ,-3.6998532 ,-2.3723404 , 0.4008657 , 2.1002467 , 4.9284163 , 4.6011457 ,-4.8977246 , 4.7852945 , 1.2170111 ,-1.055987 , 2.27575 , 1.0601226 ,-4.176826 , 0.08197393, 4.0421042 , 3.6263971 , 2.6941037 ,-2.644993 , 0.10439859,-4.512112 , 3.7939842 ,-4.8532767 , 0.391317 , 3.6432517 ,-3.9992728 , 0.29700363, 1.2722415 ,-2.3793647 ,-3.377246 , 2.0930648 , 2.574604 ,-1.2509564 , 0.4457573 ,-0.46469867, 2.6793416 , 0.02566718,-0.11948132,-3.1046712 ,-0.6204446 ,-4.615342 , 4.057695 , 1.1312845 ,-3.0446556 ,-1.9381613 ,-0.92255247,-3.5459394 ,-1.1972907 , 0.5879403 ,-1.2265042 ,-2.6279037 , 3.7533212 ,-0.2950134 ,-1.6104454 , 4.7811155 , 3.9216835 ,-2.2905827 ,-3.9489107 ,-4.078132 , 4.878544 ,-2.1483154 ,-3.1480436 ,-1.8742744 , 0.38310575,-4.0457416 ,-1.5423136 , 4.9426446 , 2.80434 ,-2.758338 , 1.6596367 ,-4.559686 ,-1.2686385 ,-1.2173673 , 0.49475643,-2.4956207 ,-1.5008336 ,-1.7967415 ,-1.1574938 , 2.2852411 , 1.7171949 ,-3.328038 ,-3.1454384 ,-0.41883984, 3.822312 , 1.1161699 ,-1.5137968 , 3.1651397 , 3.2411747 , 1.2685378 , 2.7408757 ,-3.078621 , 3.3460293 ,-0.34918678,-1.0433053 , 0.9397743 ,-3.9071774 , 0.68924445, 4.896269 ,-4.234603 ,-4.8659916 , 1.472339 , 4.5464644 , 0.35857418, 3.4065645 ,-1.514736 , 4.2301235
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..163c037cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-0.91463715,-2.9258113 , 4.4465976 ,-0.84762925,-3.3510911 ,-0.15094744, 2.2284694 , 3.9705405 ,-1.6315348 , 4.698665 , 2.8595035 ,-2.4719086 , 4.2091336 ,-3.7003224 , 0.06198901, 4.24617 ,-3.7041452 , 1.4280707 , 0.61925036, 3.873551 , 0.3554166 , 3.0535998 ,-1.403015 , 2.5769274 , 4.0060935 ,-2.134697 , 0.61366636,-2.2069314 , 3.5629356 ,-4.94381 , 3.3054771 ,-0.42945656, 4.4868546 , 4.124087 ,-4.039486 , 0.75716823,-4.530404 ,-0.8464823 , 2.7817092 ,-4.954212 , 4.790015 , 2.5307322 , 0.635834 ,-3.393037 ,-3.7000508 ,-1.1439751 ,-2.4422479 , 3.9414582 ,-4.0586324 ,-3.5872777 , 2.2529798 , 0.50453144,-2.9947112 ,-0.76174486, 0.8427806 ,-0.90798455,-0.5518859 ,-1.1810572 , 1.2787138 ,-1.7791113 ,-4.661412 ,-3.7413049 , 0.03910514, 3.970302 ,-3.0697417 ,-4.107844 ,-1.985001 ,-2.434408 ,-3.0120797 , 0.34467867, 0.09826441, 3.1933572 , 0.09855966, 1.7976784 ,-3.3814316 ,-2.8423817 ,-4.787137 , 0.21746217,-1.8560363 ,-0.7145455 , 3.911294 , 4.6970305 ,-4.0105987 , 3.3843613 , 2.3087065 , 1.8619018 , 1.6607213 ,-4.1276345 ,-0.15251912, 3.1198032 , 1.8143575 , 2.178214 ,-4.6250186 , 4.4006424 ,-3.378407 , 3.6481302 , 4.4439235 , 4.5322957 , 2.7754776 , 1.9026359 ,-2.9371052 , 0.32501587, 4.980984 ,-3.2300677 , 4.190388 , 4.441369 , 0.8116277 ,-4.7056756 , 1.1501676 ,-0.9759702 ,-0.1920487 ,-3.2009268 , 4.654679 , 4.043145 , 4.579935 , 4.917842 ,-3.2166183 , 2.381046 , 2.3470554 , 0.04456256,-2.6785278 ,-2.1683002 ,-0.2686819 , 0.6097173 , 1.5071467 , 3.9692068 ,-3.4313831 ,-0.87708473, 3.9917011 , 0.7843428 ,-4.6622047 , 0.774621 ,-4.6538844 , 3.6392822 , 4.962988 , 1.4132729 ,-0.40482154,-1.8656421 ,-1.6113061 ,-1.3454957 , 0.40846685,-4.5410986 , 2.7158992 ,-1.8403106 ,-3.803351 , 4.406537 ,-1.5868717 , 2.7034876 ,-3.3383765 , 4.6084027 ,-1.691095 ,-0.52188784, 2.9010768 , 0.08786624, 2.7466853 ,-1.7457972 , 0.59371734,-0.1716976 ,-2.6220891 , 4.9432936 , 2.3500183 , 1.6905144 ,-2.7329378 , 4.003541 ,-1.1137847 , 3.9017355 , 0.9116626 , 4.233729 ,-2.6706429 , 3.4342804 ,-0.42729262, 1.174779 ,-4.944099 , 1.2316282 , 4.9237943 ,-2.2999635 ,-4.9210916 ,-1.9033331 , 0.43241265, 3.2149148 , 4.1269703 , 0.8590868 , 2.734273 , 1.658618 ,-2.1702065 ,-2.0058317 , 4.0706363 , 4.003833 ,-0.35835287, 2.5514262 , 1.2571276 ,-4.655018 , 3.6468434 , 0.06320113,-4.662375 , 1.0745742 ,-1.117399 , 4.167245 , 4.59434 ,-1.686359 ,-0.17328739, 0.3083307 , 3.3926466 , 2.2254786 ,-0.45468137, 2.4956248 ,-3.492782 ,-2.9805465 ,-1.0610795 ,-0.2784433 , 0.7163735 ,-3.0048254 ,-1.8024784 ,-3.3139167 ,-1.8410577 , 4.5702477 ,-3.4454951 ,-1.4504164 ,-1.7432297 ,-4.998418 ,-2.5524495 , 3.028534 , 4.075326 ,-2.2187853 ,-0.6484594 , 3.00815 ,-2.8010397 ,-4.5529976 , 1.7830837 , 0.3373458 , 0.19151935,-1.0437245 ,-3.6349878 , 1.1947471 ,-1.9664146 , 0.27316815,-0.20781417, 2.419226 , 0.02246885, 4.5222287 , 3.1069999 , 3.940458 , 4.2710595 , 3.4216619 , 2.8447206 , 2.7136886 ,-0.60954016, 2.9277234 , 3.995615 ,-0.30593097, 1.7800944 , 1.0608315 , 3.8786283 ,-2.7564247 , 1.8526665 ,-3.8638606
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..e580d6f85
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.024665 , 3.0544488,-4.5645285,-3.2134292,-2.1543078, 4.039755 ,-4.613908 , 4.2014904, 3.8222141,-4.4992657,-4.02681 ,-3.2933445
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..c593dfbb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-2.669042 , 2.479217 , 4.691815 , 1.8187722 ,-3.7656548 ,-2.0555806 ,-2.4494352 ,-3.2394514 ,-0.38215363,-1.543695 ,-0.6927158 , 2.3534324
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..14520a177
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 4.036224 ,-1.2903051 , 1.2116423 , 3.92255 ,-0.48049024,-1.0290806 ,-0.9644837 , 1.3379688 ,-1.0027533 ,-1.9611529 , 3.7190473 , 0.45794436
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..2238d5e9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.560488 ,-1.2475324, 1.8892838,-2.0155866,-4.968927 , 0.3717404,-0.6095849, 3.2483344,-1.2499679, 1.4237018,-3.1225715, 3.0611598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..14a91ccc9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.7167594, 2.116633 ,-1.3816848,-1.7106141,-3.273076 ,-4.148302 ,-2.1654181, 0.4368236, 3.4279666, 1.2954224, 1.3004405,-4.3022
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..3b2a3c258
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.9167333 , 0.9170983 ,-2.4031715 , 0.4819133 , 0.21536288,-2.0262568 , 4.364642 , 1.7851653 , 2.0982797 , 0.5736603 , 2.5769486 , 3.68285
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..dff8a3b09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 3.8708763 , 3.263454 ,-4.796817 , 0.6411522 ,-3.0385532 , 0.49334133,-0.20283684,-0.88814104, 4.826072 ,-4.8037696 , 4.757636 ,-3.036691
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..93e747284
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-3.8694625 ,-3.5254061 ,-0.23680535, 4.1042504 , 3.2534697 ,-1.8511593 ,-1.9182487 , 2.6457057 , 0.12923336, 2.618141 , 1.2465005 ,-4.4625525
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..c924e03d9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.5559328 , 1.768443 ,-1.4850446 ,-1.2771453 ,-2.7216687 , 2.80077 , 0.21637216,-0.6145739 ,-0.37175298, 3.8750615 ,-1.9910356 ,-1.657059
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..1153c85ed
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-1.6168976 ,-3.816399 ,-0.55625045, 4.961818 , 0.19316113,-2.6601286 ,-1.6928803 , 4.1208386 ,-1.4012221 , 2.7742999 , 0.75798005,-2.5877
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..1f2993269
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-3.3436873 ,-0.79453826, 2.2211137 , 2.6420908 ,-1.3191302 , 1.2973647 ,-4.506594 , 4.867371 ,-4.318404 , 1.6957753 ,-4.3091793 ,-3.2230556 , 4.9175825 ,-3.1527104 ,-2.6669753 ,-2.1135337 ,-3.7701926 ,-3.358504 ,-4.419803 , 3.2045574 ,-0.5828494 ,-3.5796826 ,-4.0088696 ,-4.7178082 , 2.2726505 , 2.1860175 , 3.7198956 ,-0.5788681 ,-3.7766652 ,-0.65016747, 3.707159 ,-2.240267 , 4.5772953 ,-0.54754776, 4.7143884 ,-3.196982 ,-3.6356654 , 3.7157805 , 3.1312432 , 0.58816016, 2.1710336 ,-1.600533 ,-3.689763 , 4.322089 , 0.4816874 , 2.2769346 ,-3.9072733 ,-0.58615017
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..a19ea6696
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-1.275483 ,-3.6622071 ,-0.87433696, 0.60946655, 1.4415421 , 3.3705983 , 2.2635043 , 3.3926573 ,-0.2936643 ,-0.5169573 , 3.2535644 , 2.1269164 ,-3.4180303 , 1.0427854 ,-1.3514856 , 3.6084783 , 4.569944 ,-0.79272085, 2.9771423 ,-1.6668562 , 4.8700657 , 0.3355385 , 0.76509756, 3.5142152 ,-1.6743544 , 4.794434 ,-2.958765 ,-0.23857778, 2.4555902 , 2.459867 , 3.3922994 ,-4.350212 , 0.6286153 , 0.8139546 , 4.1676807 ,-3.3461437 , 0.69633776,-4.6548877 , 0.98267466,-4.508397 ,-1.4581255 ,-1.2289628 , 3.8701873 , 3.334336 ,-3.5611253 , 2.6133575 ,-1.0554558 ,-3.3291767
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..7113eb52e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.6250365 ,-4.798417 ,-4.214081 ,-3.625409 , 2.4391694 , 4.1856265 , 3.2472587 ,-3.20996 ,-2.3537548 , 1.3749354 , 2.5947835 ,-1.8891864 ,-3.612735 , 2.246563 , 1.2701501 ,-2.8927476 ,-0.71078295,-3.6037376 ,-4.5916877 , 2.0044398 , 3.4437728 ,-1.0695096 , 4.3483944 ,-3.3387017 ,-0.9384242 , 1.4229002 ,-0.6568144 , 1.1164346 , 1.7145283 ,-2.596518 , 4.6728883 , 3.4737296 , 1.7935314 , 3.1263895 , 1.3614839 ,-3.824968 ,-3.0405738 , 3.1729462 ,-4.1985774 ,-2.9489865 ,-4.2080064 , 2.0368521 ,-2.858539 ,-0.03206728,-1.1123812 , 0.2994737 , 1.6906137 ,-0.8665008
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..afeb2c0e6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-4.5279946 ,-3.4497826 ,-2.058617 ,-0.39549035,-0.26672208, 3.0173857 , 3.2430282 , 1.9996022 , 1.3895315 , 1.7620904 ,-4.9040093 ,-3.2858686 ,-2.2823575 ,-1.4176623 ,-0.537347 , 0.68219584,-3.193989 ,-3.1675165 , 0.47214374,-4.390378 ,-1.8730192 , 1.4416525 ,-3.0460286 ,-0.73547626, 1.8686327 ,-0.8146671 ,-2.0906649 , 0.01226121,-0.06992937, 0.9302521 ,-2.1858516 , 4.8370657 ,-4.1847024 , 4.4963436 ,-1.3834711 ,-1.1244944 , 0.4290957 ,-4.2681174 , 1.2978764 , 3.4149706 ,-2.7011304 ,-3.1285405 ,-3.8857136 ,-0.18625297,-0.13618916, 2.427405 ,-1.7979074 ,-1.4174187
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..99c6284d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-0.40635094,-2.485209 ,-2.9641154 , 4.09174 ,-1.9137962 ,-2.0860991 , 1.6594787 , 0.53744185, 1.7737653 ,-1.7054961 , 2.5611186 ,-1.1456238 , 2.741241 ,-2.283051 ,-4.2111306 ,-0.8722772 , 1.6465468 ,-0.61518955, 0.08495517, 3.6847656 , 3.7826371 , 2.0023444 ,-3.5326133 , 2.3723035 , 3.7383325 ,-3.3514297 , 2.031452 ,-0.7364658 ,-4.3347225 ,-2.8146286 ,-1.37377 ,-3.518721 ,-0.19657679,-1.6831368 , 1.2457223 , 0.25099897,-4.4722757 ,-4.135197 ,-0.6378818 , 3.8833187 , 1.9291897 , 2.5969315 , 2.146067 ,-2.846719 ,-2.2562532 ,-2.6856182 , 2.824374 , 2.3662992
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..081a1e6ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..f6b31db38
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.7488093 , 4.805902 ,-0.29828382, 0.57486725,-4.864297 , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353 ,-3.9832466 , 3.1243927 ,-4.795229 , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881 , 4.316894 ,-0.6444627 ,-3.8289468 , 4.012964 , 0.7878584 ,-1.8921386 , 2.779619 ,-3.762597 , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568 , 2.6959393 , 3.8100271 ,-2.099064 , 3.3663592 ,-2.0818436
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..acc01cb55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.279912 ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241 ,-3.9593613 , 4.189409 ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294 ,-4.5454354 , 4.362368 , 2.2204642 ,-4.9866576 , 3.31571 , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307 ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337 ,-1.0270193 ,-4.807054 , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465 , 4.892313 , 4.715815 ,-4.6481915 , 0.24676175
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..0f0b7a939
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915 ,-1.7996464 , 1.2268789 ,-4.938172 ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261 ,-1.0718596 ,-3.996107 ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381 , 4.6604958 , 4.6422915 ,-1.097833 , 3.666126 , 0.4735639 ,-4.480704 ,-4.831033 ,-0.27288163, 4.588138 , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873 ,-1.7102181 , 1.7746873 , 0.02711739
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..d23450db6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.707647 ,-4.0921726 , 3.5813692 ,-4.71081 , 3.157816 ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102 ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016 ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485 ,-0.4829316 , 4.8046784 ,-1.753812 , 4.878712 ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104 , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599 , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..eb058a1c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.55411166,-4.1992335 , 1.4317423 ,-3.7261302 , 1.151971 ,-2.117022 ,-0.7386241 , 4.654951 , 1.4869142 ,-4.6252975 ,-3.305923 , 3.632628 ,-2.6403873 ,-4.862389 , 3.477561 ,-4.9842925 ,-3.6267536 , 4.9950438
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..ff15f032d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.18094282,-0.58095986, 1.2765085 ,-0.534363 , 4.5564513 ,-0.28305855, 0.80606604,-3.3217795 ,-0.08041744,-3.7558215 ,-0.5370528 , 1.8984528 ,-0.09462419,-0.28595117, 4.6817894 ,-4.6653147 ,-4.127137 ,-2.3407753
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..e564168bf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.62747055, 1.4133646 ,-0.9954612 ,-4.687624 ,-2.5390003 ,-4.534569 ,-1.1943612 ,-4.830596 , 4.3214984 ,-2.4795794 , 4.166298 ,-1.4772589 ,-4.074577 , 3.2332711 ,-1.5221404 ,-1.7308865 , 0.06814837, 2.944668
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..c763b6311
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.2136867 , 0.6229863 , 0.02772082,-0.00820862,-2.4893622 ,-0.6757174 ,-2.2024722 ,-2.0893583 , 0.33953062,-3.5438979 , 0.7000838 , 1.3219849 ,-0.02302017, 2.3125873 ,-1.5376673 ,-4.0330076 , 4.755884 , 2.729685
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..12e13272d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 0.82922786, 4.762074 ,-3.5043278 , 2.4521468 , 2.6450796 ,-2.8606322 , 0.8321993 ,-1.4020495 ,-0.25749585, 1.0287803 ,-3.911455 ,-1.8311876 , 2.763438 , 3.8604703 ,-3.5478592 ,-4.2335987 ,-3.6402035 ,-1.8485361
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..42ce6be36
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.1826919 , 0.07451724, 3.48515 , 3.4905832 , 1.8009655 , 4.155749 , 3.3155255 , 2.6834202 ,-1.7111781 ,-2.2254407 ,-4.578932 ,-2.1239302 ,-0.1269101 ,-2.6022012 ,-4.8320093 , 0.2983099 ,-0.43314072,-0.66332716
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..f677cc836
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.2971772 ,-3.6082 ,-2.2253058 ,-4.4367466 ,-1.7221912 , 0.02547262,-3.641017 , 0.2953748 , 0.7217547 , 4.663728 , 4.262444 ,-3.196005 ,-1.6792587 ,-1.7463406 , 2.030074 , 0.67998594,-0.92862725,-1.7960806
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..841ea9f8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 2.2390285 ,-1.9557759 ,-1.2331479 ,-2.4810686 ,-0.5112022 , 1.741153 , 0.13645513,-2.3543327 ,-3.2610211 , 2.5739572 ,-0.50510126, 2.3544457 , 1.884411 ,-3.7153857 ,-1.7037194 ,-0.36849263,-4.819704 , 3.047652
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..08ec9fe8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-0.9080747 ,-1.5609599 ,-0.40923035,-2.0569193 , 4.5904484 ,-0.02348744, 0.35939455, 2.2017193 , 2.2766497 ,-2.2080436 ,-2.6453862 ,-3.6456985 , 4.160244 , 1.7283534 , 4.5547447 ,-1.8674839 , 3.019465 , 1.1584582
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..a4f2d97d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 4.5920744 , 3.827386 ,-2.1228654 , 3.7227573 ,-3.4464717 , 0.31313375, 0.5531476 ,-0.30391756,-0.21601346, 3.8968146 , 0.23224053,-0.6208954 ,-0.76323295,-1.1700501 ,-1.6203161 , 2.1780837 , 2.3581395 , 2.6519518
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..0e8d687b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-2.327701 , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..67732e8f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.01133719,-3.3741624 , 3.556686 ,-4.21059 , 0.49977505, 1.768375 , 3.867543 , 2.270572 ,-3.9507272 ,-4.595618 ,-4.7460327 , 0.5856542
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..7bc7124d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-2.7181 , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037 , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345 ,-1.4018577
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..0fac9fb70
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.920553 , 0.87464577,-1.0319884 , 2.1885726 , 2.755115 ,-1.6436632 ,-4.4507327 , 4.915525 , 2.9331517 , 4.7712016 , 4.676084 ,-1.7715888
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..df79104c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.181168 ,-1.6011912 ,-4.359466 ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455 ,-2.822578 , 0.8914152 ,-3.0267959
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt
new file mode 100644
index 000000000..4b999a028
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt
@@ -0,0 +1 @@
+ 3.241328 , 2.7033713 ,-2.5329788 ,-4.078369 ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt
new file mode 100644
index 000000000..7061063b9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt
@@ -0,0 +1 @@
+ 1.572614 , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117 ,-0.96047217
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt
new file mode 100644
index 000000000..c048a8a9f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt
@@ -0,0 +1 @@
+ 0.00864919,-3.1653113 ,-2.125551 , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259 , 2.5843353 ,-2.067207 ,-2.5034845 ,-4.9441104 ,-3.9062042
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt
new file mode 100644
index 000000000..55be3b464
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt
@@ -0,0 +1 @@
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736 , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt
new file mode 100644
index 000000000..04c7a1a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt
@@ -0,0 +1 @@
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..e9db48f9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465 , 2.198896 ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963 ,-0.12837897, 4.132799 , 3.697655 , 2.0807178 ,-3.621293 , 2.121878 ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215 , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..479d062f1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 3.4726453 , 3.0497985 ,-4.234619 ,-1.0526706 , 1.7278554 ,-3.341614 , 4.54768 , 3.0954597 ,-3.735109 , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938 ,-3.327242 , 2.0654576 ,-2.2294598 ,-1.881382 , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618 , 2.3318915 , 0.84300846, 0.760918
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..ae28234bd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-4.6097918,-4.21991 ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..fd40f84f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..e81c3b8e5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.0927553 ,-2.107511 ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935 ,-1.1564373 ,-0.8199472 ,-2.245698 ,-4.8605046 ,-3.569018 ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578 ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..a8874bc5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 3.9384239 ,-3.7377489 , 0.97284186, 3.8309984 , 2.4125865 , 1.7141674 , 3.9459977 ,-0.304659 ,-3.4623327 , 4.4569106 , 4.209985 ,-0.6677348 , 3.4578135 , 1.6779743 , 2.502791 ,-1.324285 , 1.3139176 , 3.4334664 ,-2.2695086 ,-4.001059 ,-0.91164917, 4.4447775 ,-3.0275404 ,-2.0852396 , 3.6677403 ,-2.9595146 , 2.0921555 , 1.7570637 , 3.717391 ,-0.3216191 ,-0.8410847 , 2.662336
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..715e680be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.6663157 ,-0.04146723,-0.8193995 , 4.804576 ,-2.1357434 , 4.0829 ,-1.6380692 , 1.8043218 , 2.3431025 , 0.30111 , 1.2928191 ,-1.8559257 ,-0.68305963,-1.1502715 , 1.9492546 ,-2.7240746 , 2.9279857 ,-3.3329778 ,-4.8343406 ,-0.02708206, 1.1840513 , 3.6476028 , 4.75276 ,-4.9085226 ,-1.1922491 , 0.54225117, 3.17247 ,-2.7856457 ,-3.0866194 ,-2.2077718 , 1.6263398 , 3.7066603
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..3ca893e61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.8507566 ,-1.267258 , 0.5099198 , 1.650726 , 3.4329638 ,-2.2652836 , 1.2157568 , 0.18305123, 3.6754217 ,-4.6185255 ,-1.0646905 ,-0.46092424, 2.046326 ,-2.8830478 , 4.156068 ,-2.0503244 , 0.0755459 ,-4.6472006 ,-0.50128895, 3.1129324 ,-4.4048553 , 0.47983927, 1.4510479 , 3.9226127 ,-4.767221 ,-2.795826 ,-4.816457 ,-3.6127663 ,-2.2712553 , 4.586938 , 1.1028811 , 1.5028698
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..3fba8ecec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9431224 ,-3.4878132 ,-2.4831018 , 2.2395666 ,-2.3317611 ,-1.6786547 ,-2.4702384 , 3.2167027 , 1.7300137 , 2.8848834 ,-4.6395254 , 0.5527259 ,-2.915835 ,-1.0066313 ,-0.278253 , 4.6136203 ,-3.4183645 ,-1.5189631 ,-4.599058 , 3.3198457 ,-3.9464161 ,-0.6357558 , 0.32550323, 3.2147424 , 4.921844 ,-0.30067012, 3.9456701 , 0.5943688 ,-4.7229166 ,-3.6803844 ,-3.3813965 , 3.283583
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..16cc23b79
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.232644 , 4.465217 , 1.926956 ,-4.007337 ,-2.7392106 ,-2.4579394 , 2.913538 ,-1.7261469 , 3.8706868 , 0.06259949,-2.018361 , 1.2728635 ,-3.133289 ,-4.943454 ,-1.5415367 ,-4.8183494 , 4.348317 ,-2.4929109 ,-0.9018388 ,-4.776565 , 4.634248 , 3.0753953 , 2.3412373 ,-2.7086196 , 3.4485948 , 0.3561932 , 0.03650501,-2.8704169 , 1.0514414 , 3.3964615 , 1.2783849 , 4.974951
diff --git a/compiler/pota-quantization-value-test/test_quantization_with_config.sh b/compiler/pota-quantization-value-test/test_quantization_with_config.sh
new file mode 100755
index 000000000..1364dfb90
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_quantization_with_config.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# This script tests quantize_with_minmax option of circle-quantizer with config file
+#
+# HOW TO USE
+#
+# ./test_quantization_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+ MODELNAME=$1; shift
+ GRANULARITY=$1; shift
+ DTYPE=$1; shift
+ TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.quantization.mixed.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}_quantization_with_config.log" <(
+ exec 2>&1
+ set -ex
+
+ # Generate h5 input data
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+ --model "${WORKDIR}/${MODELNAME}.circle" \
+ --input "${TEST_INPUT_PATH}/${MODELNAME}_config/${GRANULARITY}/${DTYPE}" \
+ --output "${TESTCASE_FILE}.mixed.input.h5"
+
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE INPUT"
+ continue
+ fi
+
+ # Run record-minmax
+ # NOTE There is no '_with_config' test for record-minmax, because it does not
+ # use quantization config file.
+ "${RECORD_MINMAX_PATH}" \
+ --input_model "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+ --input_data "${TESTCASE_FILE}.mixed.input.h5" \
+ --output_model "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle"
+
+ # Run circle-quantizer with --quantize_with_minmax
+ "${CIRCLE_QUANTIZER_PATH}" \
+ --quantize_with_minmax float32 "${DTYPE}" "${GRANULARITY}" \
+ --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+ "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" \
+ "${TEST_RESULT_FILE}.quantized.mixed.circle"
+
+ # Dump scale, zp, weights values (circle-tensordump)
+ "${CIRCLE_TENSORDUMP_PATH}" \
+ "${TEST_RESULT_FILE}.quantized.mixed.circle" \
+ --tensors_to_hdf5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5"
+
+ # Compare result
+ "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+ --input_h5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5" \
+ --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/quantization" \
+ --mode quantization
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$TESTCASE")
+ else
+ FAILED+=("$TESTCASE")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt
index 6d58458ca..1db09cb88 100644
--- a/compiler/pp/CMakeLists.txt
+++ b/compiler/pp/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
add_library(pp STATIC ${SOURCES})
-set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(pp PUBLIC include)
target_link_libraries(pp PRIVATE nncc_common)
target_link_libraries(pp PUBLIC nncc_coverage)
diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt
index 2221e1702..31b906142 100644
--- a/compiler/record-minmax-conversion-test/CMakeLists.txt
+++ b/compiler/record-minmax-conversion-test/CMakeLists.txt
@@ -37,6 +37,6 @@ add_test(
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
"${TEST_CONFIG}"
"${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_1_13_2"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
${RECORD_MINMAX_CONVERSION_TEST}
)
diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt
index da63bbf5f..b9c08f472 100644
--- a/compiler/record-minmax/CMakeLists.txt
+++ b/compiler/record-minmax/CMakeLists.txt
@@ -1,25 +1,17 @@
-nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
-
-if(NOT HDF5_FOUND)
- message(STATUS "Build record-minmax: FAILED (missing HDF5)")
- return()
-endif(NOT HDF5_FOUND)
-
set(DRIVER "driver/Driver.cpp")
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(record-minmax ${DRIVER} ${SOURCES})
target_include_directories(record-minmax PRIVATE include)
-target_include_directories(record-minmax PRIVATE ${HDF5_INCLUDE_DIRS})
-target_link_libraries(record-minmax ${HDF5_CXX_LIBRARIES})
target_link_libraries(record-minmax arser)
target_link_libraries(record-minmax safemain)
target_link_libraries(record-minmax luci_import)
target_link_libraries(record-minmax luci_env)
target_link_libraries(record-minmax luci_export)
target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax dio_hdf5)
target_link_libraries(record-minmax vconone)
target_link_libraries(record-minmax nncc_coverage)
diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake
index 9cf12591e..69373e76f 100644
--- a/compiler/record-minmax/requires.cmake
+++ b/compiler/record-minmax/requires.cmake
@@ -2,4 +2,5 @@ require("luci")
require("luci-interpreter")
require("safemain")
require("arser")
+require("dio-hdf5")
require("vconone")
diff --git a/compiler/record-minmax/src/HDF5Importer.h b/compiler/record-minmax/src/HDF5Importer.h
deleted file mode 100644
index 9e98c7752..000000000
--- a/compiler/record-minmax/src/HDF5Importer.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __RECORD_MINMAX_HDF5IMPORTER_H__
-#define __RECORD_MINMAX_HDF5IMPORTER_H__
-
-#include <luci_interpreter/core/Tensor.h>
-
-#include <H5Cpp.h>
-
-#include <stdexcept>
-
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
-
-namespace record_minmax
-{
-
-// HDF5Importer reads an input data saved in the hdf5 file in the given path
-// The hierarchy of the hdf5 file is as follows.
-// Group "/"
-// > Group "value"
-// > Group <record_idx>
-// > Dataset <input_idx>
-// record_idx : index of the record (dataset file can contain multiple records)
-// input_idx : index of the input (DNN model can have multiple inputs)
-// Ex: the j'th input of the i'th record can be accessed by "/value/i/j"
-class HDF5Importer
-{
-public:
- explicit HDF5Importer(const std::string &path)
- {
- if (_file.isHdf5(path) == false)
- throw std::runtime_error("Given data file is not HDF5");
-
- _file = H5::H5File(path, H5F_ACC_RDONLY);
- }
-
-public:
- /**
- * @brief importGroup has to be called before readTensor is called
- * Otherwise, readTensor will throw an exception
- */
- void importGroup() { _value_grp = _file.openGroup("value"); }
-
- /**
- * @brief Read tensor data from file and store it into buffer
- * @details A tensor in the file can be retrieved with (record_idx, input_idx)
- * @param record_idx : index of the record
- * @param input_idx : index of the input
- * @param dtype : pointer to write the tensor's data type
- * @param shape : pointer to write the tensor's shape
- * @param buffer : pointer to write the tensor's data
- */
- void readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
- void *buffer);
-
- // Read a raw tensor (no type/shape is specified)
- void readTensor(int32_t record_idx, int32_t input_idx, void *buffer);
-
- bool isRawData() { return _value_grp.attrExists("rawData"); }
-
- int32_t numRecords() { return _value_grp.getNumObjs(); }
-
- int32_t numInputs(int32_t record_idx);
-
-private:
- H5::H5File _file;
- H5::Group _value_grp;
-};
-
-} // namespace record_minmax
-
-#endif // __RECORD_MINMAX_HDF5IMPORTER_H__
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp
index 28ae2b33b..8288d3e5e 100644
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -51,6 +51,16 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
// Bool type tensor is not quantized
return;
}
+ if (node->dtype() == DataType::S32)
+ {
+ // Integer type tensor is not quantized
+ return;
+ }
+ if (node->dtype() == DataType::S64)
+ {
+ // Integer type tensor is not quantized
+ return;
+ }
// Only support recording of float32 values
if (tensor->element_type() != DataType::FLOAT32)
@@ -58,9 +68,6 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
// Exceptions that should be processed in backends
switch (node->opcode())
{
- case luci::CircleOpcode::ARG_MAX:
- // Output of arg_max is the index of the largest value across axes of a tensor.
- // It always has integer type.
case luci::CircleOpcode::CAST:
// Cast is quantized only if it converts <type> -> float.
// Other cases should be processed in backends.
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index c249960f8..10a14516f 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -17,12 +17,12 @@
#include "RecordMinMax.h"
#include "RecordFunction.h"
#include "MinMaxObserver.h"
-#include "HDF5Importer.h"
#include <luci/Importer.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
#include <luci/IR/CircleQuantParam.h>
+#include <dio_hdf5/HDF5Importer.h>
#include <dirent.h>
#include <algorithm>
@@ -33,12 +33,34 @@
#include <iostream>
#include <random>
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
namespace
{
+uint32_t numElements(const luci::CircleNode *node)
+{
+ uint32_t num_elements = 1;
+ for (uint32_t i = 0; i < node->rank(); i++)
+ num_elements *= node->dim(i).value();
+
+ return num_elements;
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+ for (uint32_t i = 0; i < input->rank(); i++)
+ if (!input->dim(i).known())
+ throw std::runtime_error(input->name() + " has unknown dimension");
+
+ if (numElements(input) == 0)
+ throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
void readDataFromFile(const std::string &filename, std::vector<char> &data, size_t data_size)
{
assert(data.size() == data_size); // FIX_CALLER_UNLESS
@@ -62,6 +84,21 @@ std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
return input_data;
}
+template <typename T>
+std::vector<T> genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max)
+{
+ std::uniform_int_distribution<T> dist(min, max);
+ std::vector<T> input_data(num_elements);
+
+ // Write random data
+ {
+ auto const generator = [&gen, &dist]() { return dist(gen); };
+ std::generate(begin(input_data), end(input_data), generator);
+ }
+
+ return input_data;
+}
+
/**
* @brief getTensorSize will return size in bytes
*/
@@ -83,12 +120,12 @@ void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype,
if (dtype != input_node->dtype())
throw std::runtime_error("Wrong input type.");
- if (shape.num_dims() != input_node->rank())
+ if (shape.size() != input_node->rank())
throw std::runtime_error("Input rank mismatch.");
- for (uint32_t i = 0; i < shape.num_dims(); i++)
+ for (uint32_t i = 0; i < shape.size(); i++)
{
- if (shape.dim(i) != input_node->dim(i).value())
+ if (not(shape.at(i) == input_node->dim(i)))
throw std::runtime_error("Input shape mismatch.");
}
}
@@ -188,6 +225,7 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode,
for (auto input : input_nodes)
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+ checkInputDimension(input_node);
total_input_size += getTensorSize(input_node);
}
@@ -254,6 +292,7 @@ void RecordMinMax::profileRawData(const std::string &mode, const std::string &in
for (auto input : input_nodes)
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+ checkInputDimension(input_node);
total_input_size += getTensorSize(input_node);
}
@@ -296,12 +335,12 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
{
try
{
- HDF5Importer importer(input_data_path);
- importer.importGroup();
+ dio::hdf5::HDF5Importer importer(input_data_path);
+ importer.importGroup("value");
bool is_raw_data = importer.isRawData();
- const auto num_records = importer.numRecords();
+ const auto num_records = importer.numData();
if (num_records == 0)
throw std::runtime_error("The input data file does not contain any record.");
@@ -319,12 +358,13 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
assert(input_node->index() == input_idx);
+ checkInputDimension(input_node);
std::vector<char> input_data(getTensorSize(input_node));
if (!is_raw_data)
{
DataType dtype;
- Shape shape(input_node->rank());
+ Shape shape;
importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
// Check the type and the shape of the input data is valid
@@ -376,43 +416,47 @@ void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float mi
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
assert(input_node->index() == input_idx);
- uint32_t num_elements = 1;
- for (uint32_t i = 0; i < input_node->rank(); i++)
- {
- if (!input_node->dim(i).known())
- throw std::runtime_error("Input dimension must be known");
+ checkInputDimension(input_node);
- num_elements *= input_node->dim(i).value();
- }
-
- if (num_elements == 0)
- throw std::runtime_error("Only support non-zero sized inputs");
+ const auto num_elements = numElements(input_node);
// TODO Support more input data types
assert(input_node->dtype() == loco::DataType::FLOAT32 ||
- input_node->dtype() == loco::DataType::BOOL);
+ input_node->dtype() == loco::DataType::BOOL ||
+ input_node->dtype() == loco::DataType::S32 ||
+ input_node->dtype() == loco::DataType::S64);
if (input_node->dtype() == DataType::FLOAT32)
- // clang-format off
{
- std::vector<float> input_data(num_elements);
+ std::vector<float> input_data(num_elements);
- // Write random data
- for (auto &iter : input_data)
- iter = static_cast<float>(dist(gen));
+ // Write random data
+ for (auto &iter : input_data)
+ iter = static_cast<float>(dist(gen));
- // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
- // We can redcue the copy by directly writing data from file to interpreter inputs
- _interpreter->writeInputTensor(input_node, input_data.data(),
- input_data.size() * sizeof(float));
+ // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+ // We can redcue the copy by directly writing data from file to interpreter inputs
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(float));
}
- // clang-format on
else if (input_node->dtype() == DataType::BOOL)
{
auto input_data = genRandomBoolData(gen, num_elements);
_interpreter->writeInputTensor(input_node, input_data.data(),
input_data.size() * sizeof(uint8_t));
}
+ else if (input_node->dtype() == DataType::S32)
+ {
+ auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int32_t));
+ }
+ else if (input_node->dtype() == DataType::S64)
+ {
+ auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int64_t));
+ }
}
_interpreter->interpret();
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index ca7eddc6f..f57102f1f 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,7 +1,7 @@
nnas_find_package(Protobuf QUIET)
if(NOT Protobuf_FOUND)
- message(STATUS "Build souschef: FAILED (missing Protobuf")
+ message(STATUS "Build souschef: FAILED (missing Protobuf)")
return()
endif(NOT Protobuf_FOUND)
diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
index 3e7e57747..0b4739374 100644
--- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
+++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
@@ -72,7 +72,7 @@ list(APPEND TEST_DEPS "${TEST_RUNNER}")
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
###
### Generate test.config
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
index 6ba55c357..9e1cb720f 100644
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -10,5 +10,6 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES})
target_include_directories(tfl-inspect PRIVATE src)
target_link_libraries(tfl-inspect arser)
target_link_libraries(tfl-inspect foder)
-target_link_libraries(tfl-inspect mio_tflite260)
+target_link_libraries(tfl-inspect mio_tflite280)
+target_link_libraries(tfl-inspect mio_tflite280_helper)
target_link_libraries(tfl-inspect safemain)
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake
index 9a7477b81..a11f6b200 100644
--- a/compiler/tfl-inspect/requires.cmake
+++ b/compiler/tfl-inspect/requires.cmake
@@ -1,4 +1,4 @@
require("arser")
require("foder")
-require("mio-tflite260")
+require("mio-tflite280")
require("safemain")
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp
index 41a8396bb..6c4529516 100644
--- a/compiler/tfl-inspect/src/Reader.cpp
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -16,6 +16,8 @@
#include "Reader.h"
+#include <mio_tflite280/Helper.h>
+
#include <cassert>
#include <sstream>
#include <string>
@@ -23,72 +25,6 @@
namespace tflinspect
{
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- int8_t dp_code = opcode->deprecated_builtin_code();
- // 127 is max of int8_t which is upper bound of v3 builtin_code
- // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
- if (dp_code < 127 && dp_code >= 0)
- return tflite::BuiltinOperator(dp_code);
- return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
- assert(opcode);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (!opcode->custom_code())
- return "(invalid custom)";
-
- std::string custom_op = "CUSTOM(";
- custom_op += opcode->custom_code()->c_str();
- custom_op += ")";
- return custom_op;
- }
-
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
- return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- auto name = tensor->name();
- if (name)
- return name->c_str();
-
- return kEmptyTensorName;
-}
-
Reader::Reader(const tflite::Model *model)
{
_subgraphs = model->subgraphs();
@@ -135,7 +71,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return tflinspect::builtin_code_neutral(opcode);
+ return mio::tflite::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -144,14 +80,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::tflite::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- return tflinspect::opcode_name(opcode);
+ return mio::tflite::opcode_name(opcode);
}
bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h
index 91b7bb940..98554cf85 100644
--- a/compiler/tfl-inspect/src/Reader.h
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
index a87d30c5e..2fba335ea 100644
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES})
target_include_directories(tfl-verify PRIVATE src)
target_link_libraries(tfl-verify arser)
target_link_libraries(tfl-verify foder)
-target_link_libraries(tfl-verify mio_tflite260)
+target_link_libraries(tfl-verify mio_tflite280)
target_link_libraries(tfl-verify safemain)
target_link_libraries(tfl-verify cwrap)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake
index 72803d890..b107bdfe7 100644
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,5 +1,5 @@
require("arser")
require("foder")
-require("mio-tflite260")
+require("mio-tflite280")
require("safemain")
require("cwrap")
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
index ac7fe4b7c..948b1cecd 100644
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
return()
endif(NOT Protobuf_FOUND)
-if(NOT TARGET mio_tflite260)
- message(STATUS "Build tflchef: FAILED (missing mio_tflite260)")
+if(NOT TARGET mio_tflite280)
+ message(STATUS "Build tflchef: FAILED (missing mio_tflite280)")
return()
-endif(NOT TARGET mio_tflite260)
+endif(NOT TARGET mio_tflite280)
# Recipe Parser
add_subdirectory(proto)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt
index 413b78b15..6b6fed57b 100644
--- a/compiler/tflchef/core/CMakeLists.txt
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include)
target_include_directories(tflchef_core PRIVATE src)
target_link_libraries(tflchef_core tflchef_proto)
target_link_libraries(tflchef_core tflchef_log)
-target_link_libraries(tflchef_core mio_tflite260)
+target_link_libraries(tflchef_core mio_tflite280)
target_link_libraries(tflchef_core souschef)
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index ada5ff5d1..93b9334a6 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -722,15 +722,13 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs);
auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs);
- auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name());
- auto key = flatbuffer_builder->CreateString(rec_signature_def.key());
- // TODO add validation for method_name and key
+ auto signature_key = flatbuffer_builder->CreateString(rec_signature_def.signature_key());
+ // TODO add validation for signature_key
::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder};
signature_def_builder.add_inputs(inputs);
signature_def_builder.add_outputs(outputs);
- signature_def_builder.add_method_name(method_name);
- signature_def_builder.add_key(key);
+ signature_def_builder.add_signature_key(signature_key);
signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index());
signdef_vec.emplace_back(signature_def_builder.Finish());
diff --git a/compiler/tflchef/core/src/Op/FullyConnected.cpp b/compiler/tflchef/core/src/Op/FullyConnected.cpp
index 45269916c..7173a67ba 100644
--- a/compiler/tflchef/core/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/core/src/Op/FullyConnected.cpp
@@ -29,6 +29,7 @@ flatbuffers::Offset<void> FullyConnectedChef::value(flatbuffers::FlatBufferBuild
tflite::FullyConnectedOptionsBuilder fc_options_builder{fbb};
fc_options_builder.add_fused_activation_function(tflite_activation);
+ fc_options_builder.add_keep_num_dims(operation.fullyconnected_options().keep_num_dims());
return fc_options_builder.Finish().Union();
}
diff --git a/compiler/tflchef/core/src/Op/SVDF.cpp b/compiler/tflchef/core/src/Op/SVDF.cpp
new file mode 100644
index 000000000..690896cf1
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> SVDFChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ assert(_operation->has_svdf_options());
+
+ const auto &svdf_options = _operation->svdf_options();
+
+ const auto tflite_activation = as_tflite_activation(svdf_options.activation());
+
+ tflite::SVDFOptionsBuilder svdf_options_builder{fbb};
+ svdf_options_builder.add_fused_activation_function(tflite_activation);
+ svdf_options_builder.add_asymmetric_quantize_inputs(svdf_options.asymmetric_quantize_inputs());
+ svdf_options_builder.add_rank(svdf_options.rank());
+
+ return svdf_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> SVDFChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new SVDFChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/SVDF.h b/compiler/tflchef/core/src/Op/SVDF.h
new file mode 100644
index 000000000..9bf0b6efb
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SVDF_H__
+#define __OP_SVDF_H__
+
+#include "OpChef.h"
+
+class SVDFChef final : public OpChef
+{
+public:
+ explicit SVDFChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SVDF; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SVDFOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct SVDFChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SVDF_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index b1e8a3829..beebd359f 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -104,6 +104,7 @@ OP_CHEF(Squeeze, SqueezeChefFactory)
OP_CHEF(StridedSlice, StridedSliceChefFactory)
OP_CHEF(Sub, SubChefFactory)
OP_CHEF(Sum, SumChefFactory)
+OP_CHEF(SVDF, SVDFChefFactory)
OP_CHEF(Tanh, TanhChefFactory)
OP_CHEF(Tile, TileChefFactory)
OP_CHEF(TopKV2, TopKV2ChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 35688ba95..159019abf 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -117,6 +117,7 @@
#include "Op/StridedSlice.h"
#include "Op/Sub.h"
#include "Op/Sum.h"
+#include "Op/SVDF.h"
#include "Op/Tanh.h"
#include "Op/Tile.h"
#include "Op/TopKV2.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 4162cb123..1abefafe1 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -182,6 +182,7 @@ message FloorModOptions {
message FullyConnectedOptions {
optional Activation activation = 1 [default = NONE];
+ optional bool keep_num_dims = 2 [ default = false ];
}
message AddOptions {
@@ -366,6 +367,12 @@ message SquaredDifferenceOptions {
// None
}
+message SVDFOptions {
+ optional int32 rank = 1 [default = 0];
+ optional Activation activation = 2 [default = NONE];
+ optional bool asymmetric_quantize_inputs = 3 [default = false];
+}
+
message FillOptions {
// None
}
@@ -589,7 +596,7 @@ message Operation {
optional ZerosLikeOptions zeros_like_options = 153;
// ConcatEmbeddingsOptions 154
// LSHProjectionOptions 155
- // SVDFOptions 156
+ optional SVDFOptions svdf_options = 156;
// RNNOptions 157
optional L2NormOptions l2norm_options = 158;
optional LocalResponseNormalizationOptions local_response_normalization_options = 159;
@@ -658,8 +665,8 @@ message TensorMap {
message SignatureDef {
repeated TensorMap inputs = 4;
repeated TensorMap outputs = 5;
- optional string method_name = 6;
- optional string key = 10;
+ optional string signature_key = 6;
+ // optional string key = 10; obsolete in TF2.8.0
optional uint32 subgraph_index = 12;
}
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake
index 78bfa2d07..a01da4258 100644
--- a/compiler/tflchef/requires.cmake
+++ b/compiler/tflchef/requires.cmake
@@ -1,7 +1,7 @@
require("arser")
require("nnkit")
require("cwrap")
-require("mio-tflite260")
+require("mio-tflite280")
require("safemain")
require("hermes")
require("hermes-std")
diff --git a/compiler/tflchef/tests/CMakeLists.txt b/compiler/tflchef/tests/CMakeLists.txt
index 5c4dff012..26cf67f4f 100644
--- a/compiler/tflchef/tests/CMakeLists.txt
+++ b/compiler/tflchef/tests/CMakeLists.txt
@@ -1,10 +1,11 @@
-if(NOT TARGET nnkit-run)
- return()
-endif(NOT TARGET nnkit-run)
-
-if(NOT TARGET nnkit_tflite_backend)
- return()
-endif(NOT TARGET nnkit_tflite_backend)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLCHEF_REVERSE_PATH $<TARGET_FILE:tflchef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+ # TODO use better way to represent path for host executable
+ set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+ set(TFLCHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/reverse/tflchef-reverse)
+ message(STATUS "TFLCHEF_FILE_PATH = ${TFLCHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
nncc_find_resource(TensorFlowLiteRecipes)
set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}")
@@ -26,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .tflite
add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
- COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
- DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+ COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
list(APPEND TESTS ${RECIPE_PREFIX})
@@ -52,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
# Generate .tflite
add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
- COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
- DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+ COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
list(APPEND TESTS ${RECIPE_PREFIX})
@@ -76,16 +77,16 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
# Generate .gen.recipe from generated .tflite
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
- COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
- DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
# now we are going to generate .gen.tflite from .gen.recipe
# to check generated .gen.recipe file is correct by using it.
# as weight values may be different, binary comparision is not acceptable.
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
- COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
- DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -104,13 +105,13 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
# Generate .gen.recipe from generated .tflite
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
- COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
- DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
- COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
- DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -123,7 +124,9 @@ add_custom_target(tflchef_testfiles ALL DEPENDS ${TESTFILES})
# Using mio_tflite_validate for temporary as it only calls flatbuffer validate
# TODO do testing with running the model with runtime/interpreter
+# NOTE for ARM32 cross build, $<TARGET_FILE:mio_tflite280_validate> is used as-is
+# as test should run in ARM32 device
add_test(NAME tflchef_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runvalidate.sh"
- $<TARGET_FILE:mio_tflite_validate>
+ $<TARGET_FILE:mio_tflite280_validate>
${TESTS})
diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe
index 4481752ef..9e95edf00 100644
--- a/compiler/tflchef/tests/signature_def_index/test.recipe
+++ b/compiler/tflchef/tests/signature_def_index/test.recipe
@@ -50,8 +50,7 @@ signature_def {
name: "ofm1"
tensor_index: 1
}
- method_name: "serving_default"
- key: "serv"
+ signature_key: "serving_default"
subgraph_index: 0
}
input: "ifm"
diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe
index 79be25138..4847f7dd8 100644
--- a/compiler/tflchef/tests/signature_def_name/test.recipe
+++ b/compiler/tflchef/tests/signature_def_name/test.recipe
@@ -50,8 +50,7 @@ signature_def {
name: "out1"
tensor: "ofm1"
}
- method_name: "serving_default"
- key: "serv"
+ signature_key: "serving_default"
subgraph_index: 0
}
input: "ifm"
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 3c4c3fff6..3c3352b0a 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(tflchef_tflite STATIC ${SOURCES})
target_include_directories(tflchef_tflite PUBLIC include)
target_include_directories(tflchef_tflite PRIVATE src)
target_link_libraries(tflchef_tflite tflchef_proto)
-target_link_libraries(tflchef_tflite mio_tflite260)
+target_link_libraries(tflchef_tflite mio_tflite280)
+target_link_libraries(tflchef_tflite mio_tflite280_helper)
target_link_libraries(tflchef_tflite cwrap)
target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
index 1f6e73aa6..bbc749fe4 100644
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
@@ -48,6 +48,7 @@ tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TF
auto op_options = operation->mutable_fullyconnected_options();
op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+ op_options->set_keep_num_dims(op_params->keep_num_dims());
return operation;
}
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.cpp b/compiler/tflchef/tflite/src/Op/SVDF.cpp
new file mode 100644
index 000000000..015f968a8
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/SVDF.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSVDF::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+ assert(inputs.size() == 5);
+
+ // optional input tensor idx has minus value.
+ const bool hasBias = (inputs.at(3) >= 0);
+
+ // Note: last input is variable tensor without data
+ import->set_tensor_filler(inputs.at(1));
+ import->set_tensor_filler(inputs.at(2));
+ if (hasBias)
+ import->set_tensor_filler(inputs.at(3));
+}
+
+tflchef::Operation *TFliteOpSVDF::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const auto op_params = op->builtin_options_as_SVDFOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("SVDF");
+
+ auto op_options = operation->mutable_svdf_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+ op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+ op_options->set_rank(op_params->rank());
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/SVDF.h
new file mode 100644
index 000000000..a59ca54a2
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/SVDF.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SVDF_H__
+#define __TFLITE_OP_SVDF_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for SVDF
+ */
+class TFliteOpSVDF : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SVDF_H__
diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp
index d9215a4c4..0701707c1 100644
--- a/compiler/tflchef/tflite/src/RecipeChef.cpp
+++ b/compiler/tflchef/tflite/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
*/
#include <tflchef/RecipeChef.h>
+#include <mio_tflite280/Helper.h>
#include "Convert.h"
#include "TFliteImport.h"
@@ -42,7 +43,7 @@ void set_inputs(TFliteImport *import, tflchef::Operation *operation, const tflit
else
{
auto tensor = tensors->Get(input);
- std::string name = tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
operation->add_input(name);
}
}
@@ -56,7 +57,7 @@ void set_outputs(TFliteImport *import, tflchef::Operation *operation, const tfli
for (auto output : outputs)
{
auto tensor = tensors->Get(output);
- std::string name = tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
operation->add_output(name);
}
}
@@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
::tflchef::Operand *operand = model_recipe->add_operand();
- operand->set_name(tensor_name(tensor));
+ operand->set_name(mio::tflite::tensor_name(tensor));
operand->set_type(as_tflchef_type(tensor->type()));
operand->set_is_variable(tensor->is_variable());
@@ -311,14 +312,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
for (const auto input : inputs)
{
auto tensor = tensors->Get(input);
- std::string name = tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
model_recipe->add_input(name);
}
for (const auto output : outputs)
{
auto tensor = tensors->Get(output);
- std::string name = tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
model_recipe->add_output(name);
}
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp
index 1462ee7f4..7114ab019 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.cpp
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -18,50 +18,13 @@
#include "Convert.h"
+#include <mio_tflite280/Helper.h>
+
#include <sstream>
namespace tflchef
{
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
- return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
- auto name = tensor->name();
- if (name)
- return name->c_str();
- return kEmptyTensorName;
-}
-
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- int8_t dp_code = opcode->deprecated_builtin_code();
- // 127 is max of int8_t which is upper bound of v3 builtin_code
- // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
- if (dp_code < 127 && dp_code >= 0)
- return tflite::BuiltinOperator(dp_code);
- return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
TFliteImport::TFliteImport(const tflite::Model *model)
{
_subgraphs = model->subgraphs();
@@ -104,7 +67,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return builtin_code_neutral(opcode);
+ return mio::tflite::builtin_code_neutral(opcode);
}
std::string TFliteImport::opcode_name(const tflite::Operator *op) const
@@ -113,14 +76,14 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::tflite::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- if (is_custom(opcode))
+ if (mio::tflite::is_custom(opcode))
{
if (!opcode->custom_code())
return "(invalid custom)";
@@ -128,7 +91,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
return opcode->custom_code()->c_str();
}
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+ tflite::BuiltinOperator code = mio::tflite::builtin_code_neutral(opcode);
return EnumNameBuiltinOperator(code);
}
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
index 43b5bbaff..e6722e455 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -34,12 +34,6 @@ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>
using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index 26ada7d0a..b38b35a61 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -117,6 +117,7 @@
#include "Op/StridedSlice.h"
#include "Op/Sub.h"
#include "Op/Sum.h"
+#include "Op/SVDF.h"
#include "Op/Tanh.h"
#include "Op/Tile.h"
#include "Op/TopKV2.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
index 06394ddfa..4cbe7cfcb 100644
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -154,6 +154,7 @@ private:
REG_TFL_OP(STRIDED_SLICE, TFliteOpStridedSlice);
REG_TFL_OP(SUB, TFliteOpSub);
REG_TFL_OP(SUM, TFliteOpSum);
+ REG_TFL_OP(SVDF, TFliteOpSVDF);
REG_TFL_OP(TANH, TFliteOpTanh);
REG_TFL_OP(TILE, TFliteOpTile);
REG_TFL_OP(TOPK_V2, TFliteOpTopKV2);
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
index 83f7febad..fac0be6bf 100644
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_tflite260)
- message(STATUS "Build tfldump: FAILED (missing mio_tflite260)")
+if(NOT TARGET mio_tflite280)
+ message(STATUS "Build tfldump: FAILED (missing mio_tflite280)")
return()
-endif(NOT TARGET mio_tflite260)
+endif(NOT TARGET mio_tflite280)
set(DRIVER "driver/Driver.cpp")
@@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(tfldump ${DRIVER} ${SOURCES})
target_include_directories(tfldump PRIVATE include)
target_link_libraries(tfldump arser)
-target_link_libraries(tfldump mio_tflite260)
+target_link_libraries(tfldump mio_tflite280)
+target_link_libraries(tfldump mio_tflite280_helper)
target_link_libraries(tfldump safemain)
-target_link_libraries(tfldump flatbuffers-1.12)
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
index d0f9cccba..b1abf9486 100644
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,3 @@
require("arser")
-require("mio-tflite260")
+require("mio-tflite280")
require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 2351e4c3d..2a87e47d7 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -15,6 +15,7 @@
*/
#include <tfldump/Dump.h>
+#include <mio_tflite280/Helper.h>
#include "Read.h"
#include "OpPrinter.h"
@@ -127,7 +128,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
// dump operands(tensors)
os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
- << "B(buffer index) OperandName" << std::endl;
+ << "B(buffer index) (variable) OperandName" << std::endl;
for (uint32_t i = 0; i < tensors->Length(); ++i)
{
// TODO refactor to some better structure
@@ -137,7 +138,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
if (tensor->shape())
dims = tflread::as_index_vector(tensor->shape());
- os << "T(" << reader.subgraph_index() << ":" << i << ") " << tflread::tensor_type(tensor)
+ os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::tflite::tensor_type(tensor)
<< " ";
os << "(" << dims << ") ";
if (tensor->shape_signature())
@@ -146,7 +147,11 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
os << "(" << dims_sig << ") ";
}
os << "B(" << tensor->buffer() << ") ";
- os << tflread::tensor_name(tensor) << std::endl;
+ if (tensor->is_variable())
+ {
+ os << "(variable) ";
+ }
+ os << mio::tflite::tensor_name(tensor) << std::endl;
if (auto q_params = tensor->quantization())
{
@@ -298,7 +303,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
if (input >= 0)
{
auto tensor = tensors->Get(input);
- os << tflread::tensor_name(tensor);
+ os << mio::tflite::tensor_name(tensor);
}
os << std::endl;
}
@@ -308,7 +313,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
if (output >= 0)
{
auto tensor = tensors->Get(output);
- os << tflread::tensor_name(tensor);
+ os << mio::tflite::tensor_name(tensor);
}
os << std::endl;
}
@@ -321,14 +326,14 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
for (const auto input : reader.inputs())
{
auto tensor = tensors->Get(input);
- std::string name = tflread::tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
}
for (const auto output : reader.outputs())
{
auto tensor = tensors->Get(output);
- std::string name = tflread::tensor_name(tensor);
+ std::string name = mio::tflite::tensor_name(tensor);
os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
}
@@ -360,7 +365,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
tflite::BuiltinOperator op_code = opcode->builtin_code();
tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code());
- auto op_name = tflread::opcode_name(opcode);
+ auto op_name = mio::tflite::opcode_name(opcode);
auto op_version = opcode->version();
os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
@@ -405,9 +410,8 @@ void dump_model(std::ostream &os, const tflite::Model *model)
for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
{
auto sign_i = signaturedefs->Get(i);
- os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key("
- << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
- << std::endl;
+ os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+ << sign_i->subgraph_index() << ")" << std::endl;
auto inputs_i = sign_i->inputs();
for (uint32_t t = 0; t < inputs_i->Length(); ++t)
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp
index fe04a5dd6..d2f6e06f1 100644
--- a/compiler/tfldump/src/Load.cpp
+++ b/compiler/tfldump/src/Load.cpp
@@ -76,7 +76,7 @@ public:
{
if (_value != -1)
{
- // Close on descturction
+ // Close on destructor
close(_value);
}
}
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index 90cba7173..47edcb086 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -602,6 +602,23 @@ public:
}
};
+class SVDFPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_SVDFOptions())
+ {
+ os << " ";
+ os << "rank(" << params->rank() << ") ";
+ os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
class TransposeConvPrinter : public OpPrinter
{
public:
@@ -776,6 +793,7 @@ OpPrinterRegistry::OpPrinterRegistry()
_op_map[tflite::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
_op_map[tflite::BuiltinOperator_SUB] = make_unique<SubPrinter>();
_op_map[tflite::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+ _op_map[tflite::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
_op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
// There is no Option for TOPK_V2
_op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp
index 8b3a96e83..454e3a8a1 100644
--- a/compiler/tfldump/src/Read.cpp
+++ b/compiler/tfldump/src/Read.cpp
@@ -16,76 +16,14 @@
#include "Read.h"
+#include <mio_tflite280/Helper.h>
+
#include <sstream>
#include <string>
namespace tflread
{
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- int8_t dp_code = opcode->deprecated_builtin_code();
- if (dp_code < 127 && dp_code >= 0)
- return tflite::BuiltinOperator(dp_code);
- return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
- assert(opcode);
-
- if (!is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid)";
- return oss.str();
- }
-
- if (is_custom(opcode))
- {
- if (!opcode->custom_code())
- return "(invalid custom)";
-
- std::string custom_op = "CUSTOM(";
- custom_op += opcode->custom_code()->c_str();
- custom_op += ")";
- return custom_op;
- }
-
- tflite::BuiltinOperator code = builtin_code_neutral(opcode);
- return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
- return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
- static const char *kEmptyTensorName = "(noname)";
-
- auto name = tensor->name();
- if (name)
- return name->c_str();
-
- return kEmptyTensorName;
-}
-
Reader::Reader(const tflite::Model *model)
{
_version = model->version();
@@ -129,7 +67,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return tflread::builtin_code_neutral(opcode);
+ return mio::tflite::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -138,14 +76,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- if (!is_valid(opcode))
+ if (!mio::tflite::is_valid(opcode))
{
std::ostringstream oss;
oss << "(invalid: " << index << ")";
return oss.str();
}
- return tflread::opcode_name(opcode);
+ return mio::tflite::opcode_name(opcode);
}
bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h
index 80f317d0b..1ae63877f 100644
--- a/compiler/tfldump/src/Read.h
+++ b/compiler/tfldump/src/Read.h
@@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt
index 4ea01ad31..a317a6305 100644
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -1,8 +1,8 @@
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS mio_tflite260)
-list(APPEND REQUIRED_TARGETS mio_circle)
+list(APPEND REQUIRED_TARGETS mio_tflite280)
+list(APPEND REQUIRED_TARGETS mio_circle04)
TargetRequire_Return(${REQUIRED_TARGETS})
set(DRIVER "driver/Driver.cpp")
@@ -13,8 +13,9 @@ target_include_directories(tflite2circle PRIVATE src)
target_link_libraries(tflite2circle arser)
target_link_libraries(tflite2circle foder)
target_link_libraries(tflite2circle safemain)
-target_link_libraries(tflite2circle mio_tflite260)
-target_link_libraries(tflite2circle mio_circle)
+target_link_libraries(tflite2circle mio_tflite280)
+target_link_libraries(tflite2circle mio_tflite280_helper)
+target_link_libraries(tflite2circle mio_circle04)
target_link_libraries(tflite2circle vconone)
target_link_libraries(tflite2circle nncc_coverage)
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake
index e39f9eeaf..3db9a2f2a 100644
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,6 +1,6 @@
require("arser")
require("foder")
-require("mio-tflite260")
-require("mio-circle")
+require("mio-tflite280")
+require("mio-circle04")
require("safemain")
require("vconone")
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index dc6ff086c..88a4f71df 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -102,6 +102,7 @@
#include "BuildBuiltinOptions/SqueezeOptions.h"
#include "BuildBuiltinOptions/StridedSliceOptions.h"
#include "BuildBuiltinOptions/SubOptions.h"
+#include "BuildBuiltinOptions/SVDFOptions.h"
#include "BuildBuiltinOptions/TileOptions.h"
#include "BuildBuiltinOptions/TopKV2Options.h"
#include "BuildBuiltinOptions/TransposeOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
index 2619b73eb..27410012d 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
@@ -37,6 +37,7 @@ build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tfl
else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
builtin_options_builder.add_weights_format(
circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+ builtin_options_builder.add_keep_num_dims(tflite_builtin_options->keep_num_dims());
return builtin_options_builder.Finish();
}
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp
new file mode 100644
index 000000000..e23738a69
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDFOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto *tflite_builtin_options = op->builtin_options_as_SVDFOptions();
+ assert(tflite_builtin_options);
+
+ circle::SVDFOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_rank(tflite_builtin_options->rank());
+ builtin_options_builder.add_asymmetric_quantize_inputs(
+ tflite_builtin_options->asymmetric_quantize_inputs());
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h
index 81efd666f..2ddbd3911 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,25 +14,18 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
+#ifndef __BBO_SVDF_OPTIONS_H__
+#define __BBO_SVDF_OPTIONS_H__
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-enum class AccessType
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
{
- UNKNOWN,
- READ,
- WRITE,
- READ_WRITE,
-};
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
+#endif // __BBO_SVDF_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index 90cc415ff..d483b288f 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -16,11 +16,14 @@
#include <cassert>
#include <iostream>
+#include <map>
#include <memory>
#include "CircleModel.h"
#include "DataLookup.h"
+#include <mio_tflite280/Helper.h>
+
namespace tflite2circle
{
@@ -206,7 +209,8 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
auto tflite_inputs = it_sg->inputs();
std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
- // apply signature_def to input tensor index so that input orders are correct
+ // apply signature_def to input tensor index so that input orders follow like tensorflow lite
+ // interpreter._get_full_signature_list() method, which is ordered(sorted) in name
// NOTE we do not need this when circle format supports signature_def
if (_tfl_signature_def_offsets != nullptr)
{
@@ -216,10 +220,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
{
auto inputs = it_signdef->inputs();
assert(inputs->size() == input_vec.size());
- uint32_t input_vec_idx = 0;
+
+ std::map<std::string, uint32_t> map_name_index;
for (auto it_tm : *inputs)
{
- input_vec[input_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+ map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+ }
+ uint32_t input_vec_idx = 0;
+ for (auto &item : map_name_index)
+ {
+ input_vec[input_vec_idx++] = item.second;
}
}
}
@@ -240,10 +250,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
{
auto outputs = it_signdef->outputs();
assert(outputs->size() == output_vec.size());
- uint32_t output_vec_idx = 0;
+
+ std::map<std::string, uint32_t> map_name_index;
for (auto it_tm : *outputs)
{
- output_vec[output_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+ map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+ }
+ uint32_t output_vec_idx = 0;
+ for (auto &item : map_name_index)
+ {
+ output_vec[output_vec_idx++] = item.second;
}
}
}
@@ -318,17 +334,6 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
_circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec);
}
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
- assert(opcode != nullptr);
- int8_t dp_code = opcode->deprecated_builtin_code();
- // 127 is max of int8_t which is upper bound of v3 builtin_code
- // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
- if (dp_code < 127 && dp_code >= 0)
- return tflite::BuiltinOperator(dp_code);
- return opcode->builtin_code();
-}
-
template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
@@ -337,8 +342,9 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla
{
auto custom_code = _fb->CreateString(it->custom_code());
circle::OperatorCodeBuilder operator_code_builder{*_fb};
- // TODO support circle deprecated_builtin_code
- auto bt_code = builtin_code_neutral(it);
+ auto de_code = it->deprecated_builtin_code();
+ auto bt_code = it->builtin_code();
+ operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code));
operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
operator_code_builder.add_custom_code(custom_code);
operator_code_builder.add_version(it->version());
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp
index c5ed62e31..7c3aab089 100644
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -34,6 +34,22 @@ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop)
}
}
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8)
+{
+ tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i8);
+
+ switch (tfl_bop)
+ {
+#define TFL_OPERATOR(OP) \
+ case tflite::BuiltinOperator_##OP: \
+ return static_cast<int8_t>(circle::BuiltinOperator_##OP);
+#include "TFLOperator.lst"
+#undef TFL_OPERATOR
+ default:
+ throw std::runtime_error("tflite2circle: wrong op");
+ }
+}
+
circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt)
{
switch (tfl_tt)
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h
index 601d014dd..5aeeb6eca 100644
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -30,6 +30,8 @@ namespace tflite2circle
*/
circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8);
+
/**
* @brief Returns circle TensorType according to tflite.
*
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index f2de7e046..d55ba464a 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -9,7 +9,7 @@ TFL_BUILTIN_OPTIONS(DepthwiseConv2DOptions)
//TFL_BUILTIN_OPTIONS(ConcatEmbeddingsOptions)
//TFL_BUILTIN_OPTIONS(LSHProjectionOptions)
TFL_BUILTIN_OPTIONS(Pool2DOptions)
-//TFL_BUILTIN_OPTIONS(SVDFOptions)
+TFL_BUILTIN_OPTIONS(SVDFOptions)
//TFL_BUILTIN_OPTIONS(RNNOptions)
TFL_BUILTIN_OPTIONS(FullyConnectedOptions)
TFL_BUILTIN_OPTIONS(SoftmaxOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 2241c9ec9..3841a1b78 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000130001)
+ set(VCONONE_VERSION 0x0000000000140001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
diff --git a/docs/conf.py b/docs/conf.py
index ff4070f98..84197e6d6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.19.0'
+release = '1.20.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/howto/how-to-build-compiler.md b/docs/howto/how-to-build-compiler.md
index 75699890a..29e300bdd 100644
--- a/docs/howto/how-to-build-compiler.md
+++ b/docs/howto/how-to-build-compiler.md
@@ -27,7 +27,7 @@ Here is a summary of it
```
$ sudo apt-get install \
build-essential \
-clang-format-3.9 \
+clang-format-8 \
cmake \
doxygen \
git \
@@ -122,3 +122,73 @@ $ NNAS_BUILD_PREFIX=build ./nnas create-package --preset 20200731_windows --pref
- `NNAS_BUILD_PREFIX` is the path to directory where compiler-build-artifacts will be stored.
- `--preset` is the one that specifies a version you will install. You can see `infra/packaging/preset/` directory for more details and getting latest version.
- `--prefix` is the install directory.
+
+## Cross build for Ubuntu/ARM32 (experimental)
+
+Some modules are availble to run in Ubuntu/ARM32 through cross building.
+
+While configuring the build, some modules need to execute tools for generating
+test materials and they need to execute in the host(x86-64). So some modules
+are needed to build the tools for host before cross building.
+
+Cross build overall steps are like, (1) configure for host
+(2) build tools for host (3) configure for ARM32 target (4) and then build
+for ARM32 target.
+
+Unit tests can also run in target device.
+But value test needs to run TensorFlow lite to get expected results,
+and it would be a task to do this so the data files from host execution
+are used instead.
+
+Thus to run the unit tests in the target, running in host is needed in prior.
+
+### Prepare root file system
+
+You should prepare Ubuntu/ARM32 root file system for cross compilation.
+Please refer
+[how-to-cross-build-runtime-for-arm.md](how-to-cross-build-runtime-for-arm.md)
+for preparation.
+
+You can set `ROOTFS_ARM` environment variable if you have in alternative
+folder.
+
+### Clean existing external source for patches
+
+Some external projects from source are not "cross compile ready with CMake"
+projects. This experimental project prepared some patches for this.
+Just remove the source and stamp file like below and the `make` will prepare
+patch applied source codes.
+```
+rm -rf externals/HDF5
+rm -rf externals/PROTOBUF
+rm externals/HDF5.stamp
+rm externals/PROTOBUF.stamp
+```
+
+### Build
+
+To cross build, `infra/nncc/Makefile.arm32` file is provided as an example to
+work with `make` command.
+```
+make -f infra/nncc/Makefile.arm32 cfg
+make -f infra/nncc/Makefile.arm32 debug
+```
+First `make` will run above steps (1), (2) and (3). Second `make` will run (4).
+
+### Test
+
+You can also run unit tests in ARM32 Ubuntu device with cross build results.
+First you need to run the test in host to prepare files that are currently
+complicated in target device.
+```
+# run this in x86-64 host
+make -f infra/nncc/Makefile.arm32 test_prep
+
+# run this in ARM32 target device
+make -f infra/nncc/Makefile.arm32 test
+```
+
+NOTE: this assumes
+- host and target have same directoy structure
+- should copy `build` folder to target or
+- mounting `ONE` folder with NFS on the target would be simple
diff --git a/docs/howto/how-to-build-runtime.md b/docs/howto/how-to-build-runtime.md
index 02ab47537..bf524d766 100644
--- a/docs/howto/how-to-build-runtime.md
+++ b/docs/howto/how-to-build-runtime.md
@@ -15,14 +15,14 @@ In the Ubuntu, you can easily install it with the following command.
$ sudo apt-get install cmake libboost-all-dev
```
-If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
+If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in https://github.com/Samsung/ONE/blob/master/infra/docker/bionic/Dockerfile.
Here is a summary of it
```
$ sudo apt install \
build-essential \
-clang-format-3.9 \
+clang-format-8 \
cmake \
doxygen \
git \
diff --git a/infra/cmake/modules/ExternalBuildTools.cmake b/infra/cmake/modules/ExternalBuildTools.cmake
index 4f2027b4b..557e6f47d 100644
--- a/infra/cmake/modules/ExternalBuildTools.cmake
+++ b/infra/cmake/modules/ExternalBuildTools.cmake
@@ -14,7 +14,6 @@ function(ExternalBuild_CMake)
${ARGN}
)
- set(BUILD_STAMP_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.stamp")
set(BUILD_LOG_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.log")
set(INSTALL_STAMP_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.stamp")
set(INSTALL_LOG_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.log")
@@ -24,14 +23,6 @@ function(ExternalBuild_CMake)
set(PKG_IDENTIFIER "${ARG_IDENTIFIER}")
endif(DEFINED ARG_IDENTIFIER)
- # NOTE Do NOT retry build once it fails
- if(EXISTS ${BUILD_STAMP_PATH})
- file(READ ${BUILD_STAMP_PATH} READ_IDENTIFIER)
- if("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
- return()
- endif("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
- endif(EXISTS ${BUILD_STAMP_PATH})
-
# NOTE Do NOT build pre-installed exists
if(EXISTS ${INSTALL_STAMP_PATH})
file(READ ${INSTALL_STAMP_PATH} READ_IDENTIFIER)
@@ -42,11 +33,23 @@ function(ExternalBuild_CMake)
message(STATUS "Build ${ARG_PKG_NAME} from ${ARG_CMAKE_DIR}")
+ # if we're doing the cross compilation, external project also needs it
+ if(CMAKE_TOOLCHAIN_FILE)
+ set(TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_FILE})
+ # NOTE CMAKE_TOOLCHAIN_FILE maybe relative path -> make abs folder
+ if(NOT EXISTS ${TOOLCHAIN_FILE})
+ set(TOOLCHAIN_FILE ${CMAKE_SOURCE_DIR}/${CMAKE_TOOLCHAIN_FILE})
+ if(NOT EXISTS ${TOOLCHAIN_FILE})
+ message(FATAL "Failed to find ${CMAKE_TOOLCHAIN_FILE}")
+ endif()
+ endif()
+ message(STATUS "ExternalBuild_CMake TOOLCHAIN_FILE=${TOOLCHAIN_FILE}")
+ list(APPEND ARG_EXTRA_OPTS -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE})
+ endif(CMAKE_TOOLCHAIN_FILE)
+
file(MAKE_DIRECTORY ${ARG_BUILD_DIR})
file(MAKE_DIRECTORY ${ARG_INSTALL_DIR})
- file(WRITE "${BUILD_STAMP_PATH}" "${PKG_IDENTIFIER}")
-
execute_process(COMMAND ${CMAKE_COMMAND}
-G "${CMAKE_GENERATOR}"
-DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}
diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake
index c8ca57520..f71eb5d11 100644
--- a/infra/cmake/modules/ExternalSourceTools.cmake
+++ b/infra/cmake/modules/ExternalSourceTools.cmake
@@ -5,7 +5,7 @@ function(ExternalSource_Download PREFIX)
include(CMakeParseArguments)
nnas_include(StampTools)
- cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM" "" ${ARGN})
+ cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM;PATCH" "" ${ARGN})
# Configure URL
if(ARG_URL)
@@ -104,11 +104,12 @@ function(ExternalSource_Download PREFIX)
message(STATUS "Extract ${PREFIX}")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}"
WORKING_DIRECTORY "${TMP_DIR}"
+ RESULT_VARIABLE EXTRACTION_RESULT
ERROR_VARIABLE EXTRACTION_ERROR)
- if(EXTRACTION_ERROR)
- message(FATAL_ERROR "Extract ${PREFIX} - failed")
- endif(EXTRACTION_ERROR)
+ if(EXTRACTION_RESULT AND NOT EXTRACTION_RESULT EQUAL 0)
+ message(FATAL_ERROR "Extract ${PREFIX} - failed: ${EXTRACTION_ERROR}")
+ endif()
file(REMOVE "${DOWNLOAD_PATH}")
message(STATUS "Extract ${PREFIX} - done")
@@ -123,6 +124,19 @@ function(ExternalSource_Download PREFIX)
get_filename_component(contents ${contents} ABSOLUTE)
file(RENAME ${contents} "${OUT_DIR}")
+ if(ARG_PATCH)
+ message(STATUS "Patch with ${ARG_PATCH}")
+ execute_process(COMMAND patch -p1 -i ${ARG_PATCH}
+ WORKING_DIRECTORY ${OUT_DIR}
+ RESULT_VARIABLE EXEC_RESULT
+ ERROR_VARIABLE EXEC_ERROR)
+ if(NOT EXEC_RESULT EQUAL 0)
+ message(FATAL_ERROR "${PREFIX} failed patch ${ARG_PATCH}")
+ endif(NOT EXEC_RESULT EQUAL 0)
+
+ message(STATUS "patch ${PATCH_FILE}: ${EXEC_RESULT}, ${EXEC_ERROR}")
+ endif(ARG_PATCH)
+
file(REMOVE_RECURSE "${TMP_DIR}")
file(WRITE "${STAMP_PATH}" "${URL}")
message(STATUS "Cleanup ${PREFIX} - done")
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake
index cf56dd086..6616283fb 100644
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -37,6 +37,8 @@ if("${HOST_ARCH}" STREQUAL "x86_64")
set(HOST_ARCH_BASE ${HOST_ARCH})
elseif("${HOST_ARCH}" STREQUAL "armv7l")
set(HOST_ARCH_BASE "arm")
+elseif("${HOST_ARCH}" STREQUAL "armv7hl")
+ set(HOST_ARCH_BASE "arm")
elseif("${HOST_ARCH}" STREQUAL "aarch64")
set(HOST_ARCH_BASE "aarch64")
elseif("${HOST_ARCH}" STREQUAL "i686")
@@ -49,6 +51,8 @@ if("${TARGET_ARCH}" STREQUAL "x86_64")
set(TARGET_ARCH_BASE ${TARGET_ARCH})
elseif("${TARGET_ARCH}" STREQUAL "armv7l")
set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
+ set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "aarch64")
set(TARGET_ARCH_BASE "aarch64")
elseif("${TARGET_ARCH}" STREQUAL "i686")
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
deleted file mode 100644
index 8b0a602cb..000000000
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-function(_FlatBuffers_import)
- find_package(Flatbuffers QUIET)
- set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
- if(NOT BUILD_FLATBUFFERS)
- message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
- return()
- endif(NOT BUILD_FLATBUFFERS)
-
- nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
- if(NOT FlatBuffersSource_FOUND)
- # Source is not available
- message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
- return()
- endif(NOT FlatBuffersSource_FOUND)
-
- set(ADDITIONAL_CXX_FLAGS "")
- if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
- set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
- endif()
-
- nnas_include(ExternalBuildTools)
- ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
- BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
- INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
- BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix6"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
- "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
- PKG_NAME "FLATBUFFERS-1.10")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
- if(NOT TARGET flatbuffers-1.10)
- add_library(flatbuffers-1.10 INTERFACE)
- target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
- message(STATUS "Found FlatBuffers-1.10: TRUE")
- endif(NOT TARGET flatbuffers-1.10)
-
- function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
- get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
- get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
- foreach(schema ${ARGN})
- get_filename_component(schema_fn "${schema}" NAME)
- get_filename_component(dir "${schema}" DIRECTORY)
-
- get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
- list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
- list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
- endforeach()
-
- add_custom_command(OUTPUT ${OUTPUT_FILES}
- COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
- --no-union-value-namespacing
- --gen-object-api -o "${abs_output_dir}"
- ${SCHEMA_FILES}
- DEPENDS flatbuffers::flatc)
-
- set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
- set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
- endfunction(FlatBuffers_Generate)
-
- function(FlatBuffers_Target TGT)
- set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
- set(multiValueArgs SCHEMA_FILES)
- cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
- # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
- if(NOT ARG_INCLUDE_DIR)
- set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
- endif(NOT ARG_INCLUDE_DIR)
-
- get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
- get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
- get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
- # Let's reset list variables before using them
- # NOTE THIS DOES NOT AFFECT parent scope
- unset(SCHEMA_FILES)
- unset(OUTPUT_FILES)
-
- foreach(schema ${ARG_SCHEMA_FILES})
- get_filename_component(schema_fn "${schema}" NAME)
- get_filename_component(dir "${schema}" DIRECTORY)
-
- get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
- list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
- list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
- endforeach()
-
- # Generate headers
- add_custom_command(OUTPUT ${OUTPUT_FILES}
- COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
- --no-union-value-namespacing
- --gen-object-api -o "${abs_output_dir}"
- ${SCHEMA_FILES}
- DEPENDS ${SCHEMA_FILES}
- COMMENT "Generate '${TGT}' headers")
-
- # NOTE This header-only library is deliberately declared as STATIC library
- # to avoid possible scope issues related with generated files
- add_library(${TGT} STATIC ${OUTPUT_FILES})
- set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
- target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
- target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
- endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake
index 06366db63..b7ae666b8 100644
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake
@@ -1,5 +1,6 @@
+# TODO Remove other Flatbuffers versions
function(_FlatBuffers_import)
- find_package(Flatbuffers QUIET)
+ find_package(Flatbuffers 2.0 QUIET)
set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
endfunction(_FlatBuffers_import)
@@ -9,7 +10,7 @@ function(_FlatBuffers_build)
return()
endif(NOT BUILD_FLATBUFFERS)
- nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
+ nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
if(NOT FlatBuffersSource_FOUND)
# Source is not available
@@ -24,25 +25,37 @@ function(_FlatBuffers_build)
nnas_include(ExternalBuildTools)
ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
- BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
- INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-2.0/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.12-fix3"
+ IDENTIFIER "2.0"
EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
"-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
- PKG_NAME "FLATBUFFERS-1.12")
+ PKG_NAME "FLATBUFFERS-2.0")
endfunction(_FlatBuffers_build)
_FlatBuffers_build()
_FlatBuffers_import()
+# for cross compilation BUILD_HOST_EXEC should be set for host flatc executable
+# flatc should exist as ${BUILD_HOST_EXEC}/overlay/bin/flatc.
+# and then if EXTERNAL_FLATC is set then use ${EXTERNAL_FLATC} file.
+set(FLATC_PATH "$<TARGET_FILE:flatbuffers::flatc>")
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+ set(FLATC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/flatc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_FLATC})
+ set(FLATC_PATH $ENV{EXTERNAL_FLATC})
+endif(DEFINED ENV{EXTERNAL_FLATC})
+
if(FlatBuffers_FOUND)
- if(NOT TARGET flatbuffers-1.12)
- add_library(flatbuffers-1.12 INTERFACE)
- target_link_libraries(flatbuffers-1.12 INTERFACE flatbuffers::flatbuffers)
- message(STATUS "Found FlatBuffers-1.12: TRUE")
- endif(NOT TARGET flatbuffers-1.12)
+ if(NOT TARGET flatbuffers-2.0)
+ add_library(flatbuffers-2.0 INTERFACE)
+ target_link_libraries(flatbuffers-2.0 INTERFACE flatbuffers::flatbuffers)
+ message(STATUS "Found flatbuffers-2.0: TRUE")
+ endif(NOT TARGET flatbuffers-2.0)
function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
@@ -60,7 +73,7 @@ if(FlatBuffers_FOUND)
add_custom_command(OUTPUT ${OUTPUT_FILES}
COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ COMMAND "${FLATC_PATH}" -c --no-includes
--no-union-value-namespacing
--gen-object-api -o "${abs_output_dir}"
${SCHEMA_FILES}
@@ -102,7 +115,7 @@ if(FlatBuffers_FOUND)
# Generate headers
add_custom_command(OUTPUT ${OUTPUT_FILES}
COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ COMMAND "${FLATC_PATH}" -c --no-includes
--no-union-value-namespacing
--gen-object-api -o "${abs_output_dir}"
${SCHEMA_FILES}
@@ -114,6 +127,6 @@ if(FlatBuffers_FOUND)
add_library(${TGT} STATIC ${OUTPUT_FILES})
set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
- target_link_libraries(${TGT} PUBLIC flatbuffers-1.12)
+ target_link_libraries(${TGT} PUBLIC flatbuffers-2.0)
endfunction(FlatBuffers_Target)
endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake
index 6585f21d5..e4a87a7d5 100644
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake
@@ -1,4 +1,4 @@
-set(PACKAGE_VERSION "1.10")
+set(PACKAGE_VERSION "2.0")
set(PACKAGE_VERSION_EXACT FALSE)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
set(PACKAGE_VERSION_UNSUITABLE TRUE)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake
deleted file mode 100644
index 8b0a602cb..000000000
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-function(_FlatBuffers_import)
- find_package(Flatbuffers QUIET)
- set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
- if(NOT BUILD_FLATBUFFERS)
- message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
- return()
- endif(NOT BUILD_FLATBUFFERS)
-
- nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
- if(NOT FlatBuffersSource_FOUND)
- # Source is not available
- message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
- return()
- endif(NOT FlatBuffersSource_FOUND)
-
- set(ADDITIONAL_CXX_FLAGS "")
- if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
- set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
- endif()
-
- nnas_include(ExternalBuildTools)
- ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
- BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
- INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
- BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix6"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
- "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
- PKG_NAME "FLATBUFFERS-1.10")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
- if(NOT TARGET flatbuffers-1.10)
- add_library(flatbuffers-1.10 INTERFACE)
- target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
- message(STATUS "Found FlatBuffers-1.10: TRUE")
- endif(NOT TARGET flatbuffers-1.10)
-
- function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
- get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
- get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
- foreach(schema ${ARGN})
- get_filename_component(schema_fn "${schema}" NAME)
- get_filename_component(dir "${schema}" DIRECTORY)
-
- get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
- list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
- list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
- endforeach()
-
- add_custom_command(OUTPUT ${OUTPUT_FILES}
- COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
- --no-union-value-namespacing
- --gen-object-api -o "${abs_output_dir}"
- ${SCHEMA_FILES}
- DEPENDS flatbuffers::flatc)
-
- set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
- set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
- endfunction(FlatBuffers_Generate)
-
- function(FlatBuffers_Target TGT)
- set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
- set(multiValueArgs SCHEMA_FILES)
- cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
- # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
- if(NOT ARG_INCLUDE_DIR)
- set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
- endif(NOT ARG_INCLUDE_DIR)
-
- get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
- get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
- get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
- # Let's reset list variables before using them
- # NOTE THIS DOES NOT AFFECT parent scope
- unset(SCHEMA_FILES)
- unset(OUTPUT_FILES)
-
- foreach(schema ${ARG_SCHEMA_FILES})
- get_filename_component(schema_fn "${schema}" NAME)
- get_filename_component(dir "${schema}" DIRECTORY)
-
- get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
- list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
- list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
- endforeach()
-
- # Generate headers
- add_custom_command(OUTPUT ${OUTPUT_FILES}
- COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
- --no-union-value-namespacing
- --gen-object-api -o "${abs_output_dir}"
- ${SCHEMA_FILES}
- DEPENDS ${SCHEMA_FILES}
- COMMENT "Generate '${TGT}' headers")
-
- # NOTE This header-only library is deliberately declared as STATIC library
- # to avoid possible scope issues related with generated files
- add_library(${TGT} STATIC ${OUTPUT_FILES})
- set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
- target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
- target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
- endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake
deleted file mode 100644
index 9ee2c49f7..000000000
--- a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(_FlatBuffersSource_import)
- if(NOT DOWNLOAD_FLATBUFFERS)
- set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
- return()
- endif(NOT DOWNLOAD_FLATBUFFERS)
-
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
-
- envoption(FLATBUFFERS_1_12_URL https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz)
- ExternalSource_Download(FLATBUFFERS
- DIRNAME FLATBUFFERS-1.12
- CHECKSUM MD5=c62ffefb3d4548b127cca14ce047f16c
- URL ${FLATBUFFERS_1_12_URL}
- )
-
- set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
- set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
index 09a922b67..a0a32aa9e 100644
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
@@ -7,11 +7,11 @@ function(_FlatBuffersSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(FLATBUFFERS_1_10_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
+ envoption(FLATBUFFERS_2_0_URL https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz)
ExternalSource_Download(FLATBUFFERS
- DIRNAME FLATBUFFERS-1.10
- CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
- URL ${FLATBUFFERS_1_10_URL}
+ DIRNAME FLATBUFFERS-2.0
+ CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd
+ URL ${FLATBUFFERS_2_0_URL}
)
set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake
index 8cfdbf8e5..e4a87a7d5 100644
--- a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake
@@ -1,4 +1,4 @@
-set(PACKAGE_VERSION "1.12")
+set(PACKAGE_VERSION "2.0")
set(PACKAGE_VERSION_EXACT FALSE)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
set(PACKAGE_VERSION_UNSUITABLE TRUE)
diff --git a/infra/cmake/packages/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSourceConfig.cmake
deleted file mode 100644
index 52bce6de0..000000000
--- a/infra/cmake/packages/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-function(_FlatBuffersSource_import)
- if(NOT DOWNLOAD_FLATBUFFERS)
- set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
- return()
- endif(NOT DOWNLOAD_FLATBUFFERS)
-
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
-
- # Each TensorFlow needs a specific version of Flatbuffers
- # - TensorFlow 1.7 downloads it from https://github.com/google/flatbuffers/archive/971a68110e4.tar.gz
- # - TensorFlow 1.12 downloads it from https://github.com/google/flatbuffers/archive/1f5eae5d6a1.tar.gz
- #
- # Let's use 1.10 released in 2018.10 (compatible with 1f5eae5d6a1).
- #
- # TODO Manage multiple versions
- envoption(FLATBUFFERS_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
- ExternalSource_Download(FLATBUFFERS
- DIRNAME FLATBUFFERS
- CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
- URL ${FLATBUFFERS_URL}
- )
-
- set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
- set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake
deleted file mode 100644
index ac9e22e51..000000000
--- a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(NOT PACKAGE_FIND_VERSION)
- # This package works only when find_package(...) call has no EXACT option
- set(PACKAGE_VERSION_COMPATIBLE TRUE)
- set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(NOT PACKAGE_FIND_VERSION)
diff --git a/infra/cmake/packages/GTestConfig.cmake b/infra/cmake/packages/GTestConfig.cmake
index 62a15e0cc..872ff7276 100644
--- a/infra/cmake/packages/GTestConfig.cmake
+++ b/infra/cmake/packages/GTestConfig.cmake
@@ -6,6 +6,7 @@ function(_GTest_build)
nnas_find_package(GTestSource QUIET)
if(NOT GTestSource_FOUND)
+ message(STATUS "GTest_build skip: NOT GTestSource_FOUND")
return()
endif(NOT GTestSource_FOUND)
@@ -13,9 +14,14 @@ function(_GTest_build)
ExternalBuild_CMake(CMAKE_DIR ${GTestSource_DIR}
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/GTEST/build
INSTALL_DIR ${EXT_OVERLAY_DIR}
- IDENTIFIER "1.8.0-fix1"
+ IDENTIFIER "1.11.0"
PKG_NAME "GTEST")
+ set(GTEST_FOUND TRUE PARENT_SCOPE)
+ set(GTEST_INCLUDE_DIRS ${EXT_OVERLAY_DIR}/include PARENT_SCOPE)
+ set(GTEST_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest.a PARENT_SCOPE)
+ set(GTEST_MAIN_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest_main.a PARENT_SCOPE)
+
endfunction(_GTest_build)
_GTest_build()
@@ -24,7 +30,14 @@ _GTest_build()
# Note: cmake supports GTest and does not find GTestConfig.cmake or GTest-config.cmake.
# Refer to "https://cmake.org/cmake/help/v3.5/module/FindGTest.html"
# find_package(GTest) creates options like GTEST_FOUND, not GTest_FOUND.
-find_package(GTest)
+if(GTEST_FOUND)
+ message(STATUS "Found GTest: true")
+else(GTEST_FOUND)
+ message(STATUS "GTEST_FOUND false: call find_package(GTest)")
+ # Reset package config directory cache to prevent recursive find
+ unset(GTest_DIR CACHE)
+ find_package(GTest)
+endif(GTEST_FOUND)
find_package(Threads)
if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake
index 8b7495fbc..e57d0965a 100644
--- a/infra/cmake/packages/GTestSourceConfig.cmake
+++ b/infra/cmake/packages/GTestSourceConfig.cmake
@@ -7,7 +7,7 @@ function(_GTestSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz)
+ envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz)
ExternalSource_Download(GTEST ${GTEST_URL})
diff --git a/infra/cmake/packages/H5Tinit.c.linux-armv7l b/infra/cmake/packages/H5Tinit.c.linux-armv7l
new file mode 100644
index 000000000..b0f6a470d
--- /dev/null
+++ b/infra/cmake/packages/H5Tinit.c.linux-armv7l
@@ -0,0 +1,977 @@
+/* Generated automatically by H5detect -- do not edit */
+
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * Copyright by the Board of Trustees of the University of Illinois. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the files COPYING and Copyright.html. COPYING can be found at the root *
+ * of the source code distribution tree; Copyright.html can be found at the *
+ * root level of an installed copy of the electronic HDF5 document set and *
+ * is linked from the top-level documents page. It can also be found at *
+ * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have *
+ * access to either file, you may request a copy from help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * Created: Mar 31, 2022
+ * Ubuntu <ubuntu@rpi4>
+ *
+ * Purpose: This machine-generated source code contains
+ * information about the various integer and
+ * floating point numeric formats found on this
+ * architecture. The parameters below should be
+ * checked carefully and errors reported to the
+ * HDF5 maintainer.
+ *
+ * Each of the numeric formats listed below are
+ * printed from most significant bit to least
+ * significant bit even though the actual bytes
+ * might be stored in a different order in
+ * memory. The integers above each binary byte
+ * indicate the relative order of the bytes in
+ * memory; little-endian machines have
+ * decreasing numbers while big-endian machines
+ * have increasing numbers.
+ *
+ * The fields of the numbers are printed as
+ * letters with `S' for the mantissa sign bit,
+ * `M' for the mantissa magnitude, and `E' for
+ * the exponent. The exponent has an associated
+ * bias which can be subtracted to find the
+ * true exponent. The radix point is assumed
+ * to be before the first `M' bit. Any bit
+ * of a floating-point value not falling into one
+ * of these categories is printed as a question
+ * mark. Bits of integer types are printed as
+ * `I' for 2's complement and `U' for magnitude.
+ *
+ * If the most significant bit of the normalized
+ * mantissa (always a `1' except for `0.0') is
+ * not stored then an `implicit=yes' appears
+ * under the field description. In thie case,
+ * the radix point is still assumed to be
+ * before the first `M' but after the implicit
+ * bit.
+ *
+ * Modifications:
+ *
+ * DO NOT MAKE MODIFICATIONS TO THIS FILE!
+ * It was generated by code in `H5detect.c'.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#define H5T_PACKAGE /*suppress error about including H5Tpkg.h*/
+
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h" /* Generic Functions */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5FLprivate.h" /* Free Lists */
+#include "H5Iprivate.h" /* IDs */
+#include "H5Tpkg.h" /* Datatypes */
+
+
+/****************/
+/* Local Macros */
+/****************/
+
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+
+/********************/
+/* Package Typedefs */
+/********************/
+
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+
+/********************/
+/* Public Variables */
+/********************/
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5TN_init_interface
+ *
+ * Purpose: Initialize pre-defined native datatypes from code generated
+ * during the library configuration by H5detect.
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Robb Matzke
+ * Wednesday, December 16, 1998
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5TN_init_interface(void)
+{
+ H5T_t *dt = NULL;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /*
+ * 0
+ * IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_SCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_SCHAR_ALIGN_g = 1;
+ H5T_NATIVE_SCHAR_COMP_ALIGN_g = 1;
+
+ /*
+ * 0
+ * UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UCHAR_ALIGN_g = 1;
+
+ /*
+ * 1 0
+ * IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_SHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_SHORT_ALIGN_g = 1;
+ H5T_NATIVE_SHORT_COMP_ALIGN_g = 2;
+
+ /*
+ * 1 0
+ * UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_USHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_USHORT_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_ALIGN_g = 1;
+ H5T_NATIVE_INT_COMP_ALIGN_g = 4;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_LONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_LONG_ALIGN_g = 1;
+ H5T_NATIVE_LONG_COMP_ALIGN_g = 4;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_ULONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_ULONG_ALIGN_g = 1;
+
+ /*
+ * 0
+ * IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT8_ALIGN_g = 1;
+
+ /*
+ * 0
+ * UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT8_ALIGN_g = 1;
+
+ /*
+ * 0
+ * IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_LEAST8_ALIGN_g = 1;
+
+ /*
+ * 0
+ * UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_LEAST8_ALIGN_g = 1;
+
+ /*
+ * 0
+ * IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_FAST8_ALIGN_g = 1;
+
+ /*
+ * 0
+ * UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 1;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 8;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_FAST8_ALIGN_g = 1;
+
+ /*
+ * 1 0
+ * IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT16_ALIGN_g = 1;
+
+ /*
+ * 1 0
+ * UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT16_ALIGN_g = 1;
+
+ /*
+ * 1 0
+ * IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_LEAST16_ALIGN_g = 1;
+
+ /*
+ * 1 0
+ * UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 2;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 16;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_LEAST16_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_FAST16_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_FAST16_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT32_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT32_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_LEAST32_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_LEAST32_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_FAST32_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_FAST32_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_LEAST64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_LEAST64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_INT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_INT_FAST64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_UINT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_UINT_FAST64_ALIGN_g = 1;
+
+ /*
+ * 7 6 5 4
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * 3 2 1 0
+ * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+ if((H5T_NATIVE_LLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_LLONG_ALIGN_g = 1;
+ H5T_NATIVE_LLONG_COMP_ALIGN_g = 8;
+
+ /*
+ * 7 6 5 4
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * 3 2 1 0
+ * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_INTEGER;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+ if((H5T_NATIVE_ULLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_ULLONG_ALIGN_g = 1;
+
+ /*
+ * 3 2 1 0
+ * SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+ * Implicit bit? yes
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_FLOAT;
+ dt->shared->size = 4;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 32;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.f.sign = 31;
+ dt->shared->u.atomic.u.f.epos = 23;
+ dt->shared->u.atomic.u.f.esize = 8;
+ dt->shared->u.atomic.u.f.ebias = 0x0000007f;
+ dt->shared->u.atomic.u.f.mpos = 0;
+ dt->shared->u.atomic.u.f.msize = 23;
+ dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+ dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+ if((H5T_NATIVE_FLOAT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_FLOAT_ALIGN_g = 1;
+ H5T_NATIVE_FLOAT_COMP_ALIGN_g = 4;
+
+ /*
+ * 7 6 5 4
+ * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+ * 3 2 1 0
+ * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+ * Implicit bit? yes
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_FLOAT;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.f.sign = 63;
+ dt->shared->u.atomic.u.f.epos = 52;
+ dt->shared->u.atomic.u.f.esize = 11;
+ dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+ dt->shared->u.atomic.u.f.mpos = 0;
+ dt->shared->u.atomic.u.f.msize = 52;
+ dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+ dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+ if((H5T_NATIVE_DOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_DOUBLE_ALIGN_g = 1;
+ H5T_NATIVE_DOUBLE_COMP_ALIGN_g = 8;
+
+ /*
+ * 7 6 5 4
+ * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+ * 3 2 1 0
+ * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+ * Implicit bit? yes
+ * Alignment: none
+ */
+ if(NULL == (dt = H5T__alloc()))
+ HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+ dt->shared->state = H5T_STATE_IMMUTABLE;
+ dt->shared->type = H5T_FLOAT;
+ dt->shared->size = 8;
+ dt->shared->u.atomic.order = H5T_ORDER_LE;
+ dt->shared->u.atomic.offset = 0;
+ dt->shared->u.atomic.prec = 64;
+ dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+ dt->shared->u.atomic.u.f.sign = 63;
+ dt->shared->u.atomic.u.f.epos = 52;
+ dt->shared->u.atomic.u.f.esize = 11;
+ dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+ dt->shared->u.atomic.u.f.mpos = 0;
+ dt->shared->u.atomic.u.f.msize = 52;
+ dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+ dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+ if((H5T_NATIVE_LDOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+ H5T_NATIVE_LDOUBLE_ALIGN_g = 1;
+ H5T_NATIVE_LDOUBLE_COMP_ALIGN_g = 8;
+
+ /* Set the native order for this machine */
+ H5T_native_order_g = H5T_ORDER_LE;
+
+ /* Structure alignment for pointers, hvl_t, hobj_ref_t, hdset_reg_ref_t */
+ H5T_POINTER_COMP_ALIGN_g = 4;
+ H5T_HVL_COMP_ALIGN_g = 4;
+ H5T_HOBJREF_COMP_ALIGN_g = 8;
+ H5T_HDSETREGREF_COMP_ALIGN_g = 1;
+
+done:
+ if(ret_value < 0) {
+ if(dt != NULL) {
+ dt->shared = H5FL_FREE(H5T_shared_t, dt->shared);
+ dt = H5FL_FREE(H5T_t, dt);
+ } /* end if */
+ } /* end if */
+
+ FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5TN_init_interface() */
+
+/****************************************/
+/* ALIGNMENT and signal-handling status */
+/****************************************/
+/* Signal() support: yes */
+/* setjmp() support: yes */
+/* longjmp() support: yes */
+/* sigsetjmp() support: yes */
+/* siglongjmp() support: yes */
+/* sigprocmask() support: yes */
+
+/******************************/
+/* signal handlers statistics */
+/******************************/
+/* signal_handlers tested: 15 times */
+/* sigbus_handler called: 5 times */
+/* sigsegv_handler called: 5 times */
+/* sigill_handler called: 5 times */
diff --git a/infra/cmake/packages/HDF5Config.cmake b/infra/cmake/packages/HDF5Config.cmake
index 19803f1ea..4ab338144 100644
--- a/infra/cmake/packages/HDF5Config.cmake
+++ b/infra/cmake/packages/HDF5Config.cmake
@@ -6,9 +6,24 @@ function(_HDF5_build)
nnas_find_package(HDF5Source QUIET)
if(NOT HDF5Source_FOUND)
+ message(STATUS "HD5Config skip: HDF5Source NOT FOUND")
return()
endif(NOT HDF5Source_FOUND)
+ if(DEFINED ENV{BUILD_HOST_EXEC})
+ set(EXTERNAL_H5MAKE_LIBSETTINGS $ENV{BUILD_HOST_EXEC}/externals/HDF5/build/bin/H5make_libsettings)
+ set(ENV{EXTERNAL_H5MAKE_LIBSETTINGS} ${EXTERNAL_H5MAKE_LIBSETTINGS})
+
+ # NOTE https://github.com/Samsung/ONE/issues/8762
+ # TODO generalize to select 'linux-armv7l'
+ set(H5TINIT_C_FROM_NATIVE ${CMAKE_CURRENT_LIST_DIR}/H5Tinit.c.linux-armv7l)
+ set(H5TINIT_C_COPY ${CMAKE_BINARY_DIR}/externals/HDF5/build/H5Tinit.c)
+ message(STATUS "Copy H5Tinit.c generated from target native build")
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} -E copy "${H5TINIT_C_FROM_NATIVE}" "${H5TINIT_C_COPY}"
+ )
+ endif(DEFINED ENV{BUILD_HOST_EXEC})
+
nnas_include(ExternalBuildTools)
ExternalBuild_CMake(CMAKE_DIR ${HDF5Source_DIR}
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/HDF5/build
@@ -26,6 +41,7 @@ _HDF5_build()
find_path(HDF5_CONFIG_DIR "hdf5-config.cmake"
PATHS ${EXT_OVERLAY_DIR}
+ NO_CMAKE_FIND_ROOT_PATH
PATH_SUFFIXES
cmake
share/cmake
diff --git a/infra/cmake/packages/HDF5Source.patch b/infra/cmake/packages/HDF5Source.patch
new file mode 100644
index 000000000..b8602a08a
--- /dev/null
+++ b/infra/cmake/packages/HDF5Source.patch
@@ -0,0 +1,195 @@
+Only in HDF5: build
+diff -r -u a/config/cmake/ConfigureChecks.cmake b/config/cmake/ConfigureChecks.cmake
+--- a/config/cmake/ConfigureChecks.cmake
++++ b/config/cmake/ConfigureChecks.cmake
+@@ -109,15 +109,15 @@
+ if (NOT WINDOWS)
+ CHECK_FUNCTION_EXISTS(clock_gettime CLOCK_GETTIME_IN_LIBC)
+ CHECK_LIBRARY_EXISTS(rt clock_gettime "" CLOCK_GETTIME_IN_LIBRT)
+- CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
++ #CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
+ if (CLOCK_GETTIME_IN_LIBC)
+ set (H5_HAVE_CLOCK_GETTIME 1)
+ elseif (CLOCK_GETTIME_IN_LIBRT)
+ set (H5_HAVE_CLOCK_GETTIME 1)
+ list (APPEND LINK_LIBS rt)
+- elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
+- set (H5_HAVE_CLOCK_GETTIME 1)
+- list (APPEND LINK_LIBS posix4)
++ #elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
++ # set (H5_HAVE_CLOCK_GETTIME 1)
++ # list (APPEND LINK_LIBS posix4)
+ endif (CLOCK_GETTIME_IN_LIBC)
+ endif (NOT WINDOWS)
+ #-----------------------------------------------------------------------------
+@@ -130,12 +130,17 @@
+ if (HDF5_ENABLE_DIRECT_VFD)
+ set (msg "Performing TEST_DIRECT_VFD_WORKS")
+ set (MACRO_CHECK_FUNCTION_DEFINITIONS "-DTEST_DIRECT_VFD_WORKS -D_GNU_SOURCE ${CMAKE_REQUIRED_FLAGS}")
++ if(NOT CMAKE_CROSSCOMPILING)
+ TRY_RUN (TEST_DIRECT_VFD_WORKS_RUN TEST_DIRECT_VFD_WORKS_COMPILE
+ ${CMAKE_BINARY_DIR}
+ ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+ CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
+ OUTPUT_VARIABLE OUTPUT
+ )
++ else(NOT CMAKE_CROSSCOMPILING)
++ set(TEST_DIRECT_VFD_WORKS_RUN 0)
++ set(TEST_DIRECT_VFD_WORKS_COMPILE TRUE)
++ endif(NOT CMAKE_CROSSCOMPILING)
+ if (TEST_DIRECT_VFD_WORKS_COMPILE)
+ if (TEST_DIRECT_VFD_WORKS_RUN MATCHES 0)
+ HDF_FUNCTION_TEST (HAVE_DIRECT)
+@@ -221,7 +226,12 @@
+ # The machine's conversion gets the correct value. We define the macro and disable
+ # this kind of test until we figure out what algorithm they use.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LONG_SPECIAL "Checking IF your system converts long double to (unsigned) long values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++ set(H5_LDOUBLE_TO_LONG_SPECIAL_RUN 1)
++ set(H5_LDOUBLE_TO_LONG_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine is using a special algorithm
+ # to convert some values of '(unsigned) long' to 'long double' values.
+@@ -230,7 +240,12 @@
+ # ..., 7fffff..., the compiler uses a unknown algorithm. We define a
+ # macro and skip the test for now until we know about the algorithm.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LONG_TO_LDOUBLE_SPECIAL "Checking IF your system can convert (unsigned) long to long double values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++ set(H5_LONG_TO_LDOUBLE_SPECIAL_RUN 1)
++ set(H5_LONG_TO_LDOUBLE_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # 'long double' to '(unsigned) long long' values. (This flag should be set for
+@@ -240,7 +255,12 @@
+ # 0x4351ccf385ebc8a0dfcc... or 0x4351ccf385ebc8a0ffcc... will make the converted
+ # values wildly wrong. This test detects this wrong behavior and disable the test.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LLONG_ACCURATE "Checking IF correctly converting long double to (unsigned) long long values")
++else(NOT CMAKE_CROSSCOMPILING)
++ set(H5_LDOUBLE_TO_LLONG_ACCURATE_RUN 0)
++ set(H5_LDOUBLE_TO_LLONG_ACCURATE_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # '(unsigned) long long' to 'long double' values. (This flag should be set for
+@@ -248,11 +268,21 @@
+ # 007fff..., 00ffff..., 01ffff..., ..., 7fffff..., the converted values are twice
+ # as big as they should be.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LLONG_TO_LDOUBLE_CORRECT "Checking IF correctly converting (unsigned) long long to long double values")
++else(NOT CMAKE_CROSSCOMPILING)
++ set(H5_LLONG_TO_LDOUBLE_CORRECT_RUN 0)
++ set(H5_LLONG_TO_LDOUBLE_CORRECT_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Check if pointer alignments are enforced
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_NO_ALIGNMENT_RESTRICTIONS "Checking IF alignment restrictions are strictly enforced")
++else(NOT CMAKE_CROSSCOMPILING)
++ set(H5_NO_ALIGNMENT_RESTRICTIONS_RUN 0)
++ set(H5_NO_ALIGNMENT_RESTRICTIONS_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+
+ # -----------------------------------------------------------------------
+ # wrapper script variables
+diff -r -u a/config/cmake_ext_mod/ConfigureChecks.cmake b/config/cmake_ext_mod/ConfigureChecks.cmake
+--- a/config/cmake_ext_mod/ConfigureChecks.cmake
++++ b/config/cmake_ext_mod/ConfigureChecks.cmake
+@@ -272,12 +272,17 @@
+ # http://www.gnu.org/s/libc/manual/html_node/Feature-Test-Macros.html
+ set (HDF_EXTRA_C_FLAGS -D_POSIX_C_SOURCE=199506L)
+ # _BSD_SOURCE deprecated in GLIBC >= 2.20
++ if(NOT CMAKE_CROSSCOMPILING)
+ TRY_RUN (HAVE_DEFAULT_SOURCE_RUN HAVE_DEFAULT_SOURCE_COMPILE
+ ${CMAKE_BINARY_DIR}
+ ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+ CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DHAVE_DEFAULT_SOURCE
+ OUTPUT_VARIABLE OUTPUT
+ )
++ else(NOT CMAKE_CROSSCOMPILING)
++ set(HAVE_DEFAULT_SOURCE_RUN 1)
++ set(HAVE_DEFAULT_SOURCE_COMPILE TRUE)
++ endif(NOT CMAKE_CROSSCOMPILING)
+ if (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+ set (HDF_EXTRA_FLAGS -D_DEFAULT_SOURCE)
+ else (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+@@ -287,12 +292,17 @@
+ option (HDF_ENABLE_LARGE_FILE "Enable support for large (64-bit) files on Linux." ON)
+ if (HDF_ENABLE_LARGE_FILE)
+ set (msg "Performing TEST_LFS_WORKS")
++ if(NOT CMAKE_CROSSCOMPILING)
+ TRY_RUN (TEST_LFS_WORKS_RUN TEST_LFS_WORKS_COMPILE
+ ${CMAKE_BINARY_DIR}
+ ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+ CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DTEST_LFS_WORKS
+ OUTPUT_VARIABLE OUTPUT
+ )
++ else(NOT CMAKE_CROSSCOMPILING)
++ set(TEST_LFS_WORKS_RUN 0)
++ set(TEST_LFS_WORKS_COMPILE TRUE)
++ endif(NOT CMAKE_CROSSCOMPILING)
+ if (TEST_LFS_WORKS_COMPILE)
+ if (TEST_LFS_WORKS_RUN MATCHES 0)
+ set (TEST_LFS_WORKS 1 CACHE INTERNAL ${msg})
+@@ -702,7 +712,8 @@
+ set (CURRENT_TEST_DEFINITIONS "-DPRINTF_LL_WIDTH")
+ if (${HDF_PREFIX}_SIZEOF_LONG_LONG)
+ set (CURRENT_TEST_DEFINITIONS "${CURRENT_TEST_DEFINITIONS} -DHAVE_LONG_LONG")
+ endif (${HDF_PREFIX}_SIZEOF_LONG_LONG)
++ if(NOT CMAKE_CROSSCOMPILING)
+ TRY_RUN (${HDF_PREFIX}_PRINTF_LL_TEST_RUN ${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE
+ ${CMAKE_BINARY_DIR}
+ ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+@@ -722,6 +733,13 @@
+ "Test ${HDF_PREFIX}_PRINTF_LL_WIDTH failed with the following output:\n ${OUTPUT}\n"
+ )
+ endif (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE)
++ else(NOT CMAKE_CROSSCOMPILING)
++ set (${HDF_PREFIX}_PRINTF_LL_TEST_RUN 1)
++ set (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE 1)
++ set (${HDF_PREFIX}_PRINTF_LL_WIDTH "\"L\"")
++ set (${HDF_PREFIX}_PRINTF_LL "L")
++ set (PRINT_LL_FOUND 1)
++ endif(NOT CMAKE_CROSSCOMPILING)
+
+ if (PRINT_LL_FOUND)
+ message (STATUS "Checking for appropriate format for 64 bit long: found ${${HDF_PREFIX}_PRINTF_LL_WIDTH}")
+diff -r -u a/src/CMakeLists.txt b/src/CMakeLists.txt
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -616,6 +616,7 @@
+ target_link_libraries (H5detect "ws2_32.lib")
+ endif (MSVC OR MINGW)
+
++if (NOT CMAKE_CROSSCOMPILING)
+ set (CMD $<TARGET_FILE:H5detect>)
+ add_custom_command (
+ OUTPUT ${HDF5_BINARY_DIR}/H5Tinit.c
+@@ -623,6 +624,7 @@
+ ARGS > ${HDF5_BINARY_DIR}/H5Tinit.c
+ DEPENDS H5detect
+ )
++endif (NOT CMAKE_CROSSCOMPILING)
+
+ add_executable (H5make_libsettings ${HDF5_SRC_DIR}/H5make_libsettings.c)
+ TARGET_C_PROPERTIES (H5make_libsettings STATIC " " " ")
+@@ -631,6 +633,10 @@
+ endif (MSVC OR MINGW)
+
+ set (CMD $<TARGET_FILE:H5make_libsettings>)
++# for cross compile
++if (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++ set(CMD $ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++endif (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
+ add_custom_command (
+ OUTPUT ${HDF5_BINARY_DIR}/H5lib_settings.c
+ COMMAND ${CMD}
diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake
index 134efa6f4..9db048c86 100644
--- a/infra/cmake/packages/HDF5SourceConfig.cmake
+++ b/infra/cmake/packages/HDF5SourceConfig.cmake
@@ -9,7 +9,8 @@ function(_HDF5Source_import)
envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
- ExternalSource_Download(HDF5 ${HDF5_URL})
+ ExternalSource_Download(HDF5 ${HDF5_URL}
+ PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch)
set(HDF5Source_DIR ${HDF5_SOURCE_DIR} PARENT_SCOPE)
set(HDF5Source_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/JsoncppConfig.cmake b/infra/cmake/packages/JsoncppConfig.cmake
new file mode 100644
index 000000000..3c5c3e78a
--- /dev/null
+++ b/infra/cmake/packages/JsoncppConfig.cmake
@@ -0,0 +1,34 @@
+function(_Jsoncpp_import)
+ nnas_find_package(JsoncppSource QUIET)
+
+ if(NOT JsoncppSource_FOUND)
+ set(Jsoncpp_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT JsoncppSource_FOUND)
+
+ nnas_include(ExternalBuildTools)
+ ExternalBuild_CMake(CMAKE_DIR ${JsoncppSource_DIR}
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/JSONCPP/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}
+ IDENTIFIER "1.9.5"
+ PKG_NAME "JSONCPP"
+ EXTRA_OPTS "-DBUILD_STATIC_LIBS=ON"
+ "-DBUILD_SHARED_LIBS=OFF"
+ "-DJSONCPP_WITH_TESTS=OFF"
+ "-DJSONCPP_WITH_POST_BUILD_UNITTEST=OFF")
+
+ find_path(Jsoncpp_INCLUDE_DIRS
+ NAMES json.h
+ PATHS ${EXT_OVERLAY_DIR}
+ NO_CMAKE_FIND_ROOT_PATH
+ PATH_SUFFIXES include/json)
+ find_file(Jsoncpp_STATIC_LIB
+ NAMES libjsoncpp.a
+ PATHS ${EXT_OVERLAY_DIR}
+ NO_CMAKE_FIND_ROOT_PATH
+ PATH_SUFFIXES lib)
+
+ set(Jsoncpp_FOUND TRUE PARENT_SCOPE)
+endfunction(_Jsoncpp_import)
+
+_Jsoncpp_import()
diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake
new file mode 100644
index 000000000..3195ea479
--- /dev/null
+++ b/infra/cmake/packages/JsoncppSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_JsoncppSource_import)
+ if(NOT DOWNLOAD_JSONCPP)
+ set(JsoncppSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_JSONCPP)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(JSONCPP_URL https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
+
+ ExternalSource_Download(JSONCPP ${JSONCPP_URL})
+
+ set(JsoncppSource_DIR ${JSONCPP_SOURCE_DIR} PARENT_SCOPE)
+ set(JsoncppSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_JsoncppSource_import)
+
+_JsoncppSource_import()
diff --git a/infra/cmake/packages/Opencl_HeadersConfig.cmake b/infra/cmake/packages/Opencl_HeadersConfig.cmake
index 8d65fe76e..ec7c65a73 100644
--- a/infra/cmake/packages/Opencl_HeadersConfig.cmake
+++ b/infra/cmake/packages/Opencl_HeadersConfig.cmake
@@ -2,11 +2,11 @@ function(_Opencl_Headers_import)
nnas_find_package(Opencl_HeadersSource QUIET)
# NOTE This line prevents multiple definitions of target
- if(TARGET Headers)
+ if(TARGET OpenCL_Headers)
set(Opencl_HeadersSource_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
return()
- endif(TARGET Headers)
+ endif(TARGET OpenCL_Headers)
if(NOT Opencl_HeadersSource_FOUND)
message(STATUS "Opencl_Headers: Source not found")
@@ -14,7 +14,12 @@ function(_Opencl_Headers_import)
return()
endif(NOT Opencl_HeadersSource_FOUND)
- add_extdirectory("${Opencl_HeadersSource_DIR}" OPENCL_HEADERS EXCLUDE_FROM_ALL)
+ # We don't need test builds and installs, we only need headers.
+ # add_extdirectory("${Opencl_HeadersSource_DIR}" OPENCL_HEADERS EXCLUDE_FROM_ALL)
+
+ add_library(OpenCL_Headers INTERFACE)
+ target_include_directories(OpenCL_Headers INTERFACE ${Opencl_HeadersSource_DIR})
+
set(Opencl_Headers_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
endfunction(_Opencl_Headers_import)
diff --git a/infra/cmake/packages/ProtobufConfig.cmake b/infra/cmake/packages/ProtobufConfig.cmake
index 3c8d2320f..f8e9ff1f9 100644
--- a/infra/cmake/packages/ProtobufConfig.cmake
+++ b/infra/cmake/packages/ProtobufConfig.cmake
@@ -51,17 +51,34 @@ function(_Protobuf_build)
return()
endif(NOT ProtobufSource_FOUND)
+ # set 'EXTERNAL_JS_EMBED' environment variable
+ if(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+ if(DEFINED ENV{BUILD_HOST_EXEC})
+ set(EXTERNAL_JS_EMBED $ENV{BUILD_HOST_EXEC}/externals/PROTOBUF/build/js_embed)
+ set(ENV{EXTERNAL_JS_EMBED} ${EXTERNAL_JS_EMBED})
+ endif(DEFINED ENV{BUILD_HOST_EXEC})
+ endif(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+
nnas_include(ExternalBuildTools)
ExternalBuild_CMake(CMAKE_DIR ${ProtobufSource_DIR}/cmake
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/PROTOBUF/build
INSTALL_DIR ${EXT_OVERLAY_DIR}
BUILD_FLAGS -fPIC
EXTRA_OPTS -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_WITH_ZLIB=OFF
- IDENTIFIER "3.5.2-fix1"
+ IDENTIFIER "3.5.2-fix2"
PKG_NAME "PROTOBUF")
endfunction(_Protobuf_build)
+set(PROTOC_PATH $<TARGET_FILE:protobuf::protoc>)
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+ set(PROTOC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/protoc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_PROTOC})
+ set(PROTOC_PATH $ENV{EXTERNAL_PROTOC})
+endif(DEFINED ENV{EXTERNAL_PROTOC})
+
_Protobuf_build()
if(USE_PROTOBUF_LEGACY_IMPORT)
@@ -96,7 +113,7 @@ if(Protobuf_FOUND)
add_custom_command(OUTPUT ${OUTPUT_FILES}
COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
- COMMAND "$<TARGET_FILE:protobuf::protoc>" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
+ COMMAND "${PROTOC_PATH}" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
DEPENDS ${PROTO_FILES})
set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
diff --git a/infra/cmake/packages/ProtobufSource.patch b/infra/cmake/packages/ProtobufSource.patch
new file mode 100644
index 000000000..9a83a80e4
--- /dev/null
+++ b/infra/cmake/packages/ProtobufSource.patch
@@ -0,0 +1,18 @@
+--- a/cmake/libprotoc.cmake
++++ b/cmake/libprotoc.cmake
+@@ -209,10 +209,14 @@
+ ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types/timestamp.js
+ )
+ add_executable(js_embed ${protobuf_source_dir}/src/google/protobuf/compiler/js/embed.cc)
++set(JS_EMBED_EXEC "js_embed")
++if(DEFINED ENV{EXTERNAL_JS_EMBED})
++ set(JS_EMBED_EXEC "$ENV{EXTERNAL_JS_EMBED}")
++endif()
+ add_custom_command(
+ OUTPUT ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+ DEPENDS js_embed ${js_well_known_types_sources}
+- COMMAND js_embed ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
++ COMMAND ${JS_EMBED_EXEC} ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+ )
+
+ add_library(libprotoc ${protobuf_SHARED_OR_STATIC}
diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake
index 6b35ae7dc..baa49eeb0 100644
--- a/infra/cmake/packages/ProtobufSourceConfig.cmake
+++ b/infra/cmake/packages/ProtobufSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_ProtobufSource_import)
envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
- ExternalSource_Download(PROTOBUF ${PROTOBUF_URL})
+ ExternalSource_Download(PROTOBUF ${PROTOBUF_URL}
+ PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch)
set(ProtobufSource_DIR ${PROTOBUF_SOURCE_DIR} PARENT_SCOPE)
set(ProtobufSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
new file mode 100644
index 000000000..f3663cc78
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+ if(NOT DOWNLOAD_GEMMLOWP)
+ set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_GEMMLOWP)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.8.0.
+ # See tensorflow/third_party/gemmlowp/workspace.bzl.
+ envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+ ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL})
+
+ set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
index 8cfdbf8e5..2ad2e241e 100644
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -1,4 +1,4 @@
-set(PACKAGE_VERSION "1.12")
+set(PACKAGE_VERSION "2.8.0")
set(PACKAGE_VERSION_EXACT FALSE)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
set(PACKAGE_VERSION_UNSUITABLE TRUE)
diff --git a/infra/cmake/packages/TensorFlowGpuConfig.cmake b/infra/cmake/packages/TensorFlowGpuConfig.cmake
new file mode 100644
index 000000000..7a7f78641
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuConfig.cmake
@@ -0,0 +1,22 @@
+# TensorFlowGpuConfig.cmake
+
+function(_Build_TfliteGpuDelagate_)
+ nnas_find_package(TensorFlowGpuSource REQUIRED)
+ nnas_find_package(AbseilSource REQUIRED)
+ nnas_find_package(Farmhash REQUIRED)
+ nnas_find_package(Fp16Source REQUIRED)
+
+ if(NOT TARGET TensorFlowGpu)
+ nnas_include(ExternalProjectTools)
+ add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
+ endif()
+ set(TENSORFLOWGPU_SOURCE_DIR ${TENSORFLOWGPU_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
+endfunction(_Build_TfliteGpuDelagate_)
+
+if(BUILD_TENSORFLOW_LITE_GPU)
+ _Build_TfliteGpuDelagate_()
+ set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
+else(BUILD_TENSORFLOW_LITE_GPU)
+ set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+endif(BUILD_TENSORFLOW_LITE_GPU)
diff --git a/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch b/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch
new file mode 100644
index 000000000..bf423dc80
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch
@@ -0,0 +1,292 @@
+diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h
+index 7892d0ce..fae4fb69 100644
+--- a/tensorflow/lite/delegates/gpu/api.h
++++ b/tensorflow/lite/delegates/gpu/api.h
+@@ -43,11 +43,18 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+ #include "tensorflow/lite/delegates/gpu/common/util.h"
++
++#ifdef TFLITE_GPU_LIB_FIX
+ #include <vulkan/vulkan.h>
++#endif
+
+ #define GL_NO_PROTOTYPES
+ #define EGL_NO_PROTOTYPES
++
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
++#endif
++
+ #undef GL_NO_PROTOTYPES
+ #undef EGL_NO_PROTOTYPES
+
+@@ -80,6 +87,7 @@ enum class ObjectType {
+ VULKAN_TEXTURE
+ };
+
++#ifdef TFLITE_GPU_LIB_FIX
+ struct OpenGlBuffer {
+ OpenGlBuffer() = default;
+ explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
+@@ -95,6 +103,7 @@ struct OpenGlTexture {
+ GLuint id = GL_INVALID_INDEX;
+ GLenum format = GL_INVALID_ENUM;
+ };
++#endif
+
+ struct OpenClBuffer {
+ OpenClBuffer() = default;
+@@ -111,6 +120,7 @@ struct OpenClTexture {
+ // TODO(akulik): should it specify texture format?
+ };
+
++#ifdef TFLITE_GPU_LIB_FIX
+ struct VulkanBuffer {
+ VulkanBuffer() = default;
+ explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
+@@ -143,6 +153,7 @@ struct VulkanMemory {
+ VkDeviceSize size;
+ VkDeviceSize offset;
+ };
++#endif
+
+ struct CpuMemory {
+ CpuMemory() = default;
+@@ -228,10 +239,15 @@ bool IsValid(const TensorObjectDef& def);
+ // @return the number of elements in a tensor object.
+ uint32_t NumElements(const TensorObjectDef& def);
+
++#ifdef TFLITE_GPU_LIB_FIX
+ using TensorObject =
+ absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
+ OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
+-
++#else
++using TensorObject =
++ absl::variant<absl::monostate, CpuMemory,
++ OpenClBuffer, OpenClTexture>;
++#endif
+ // @return true if object is set and corresponding values are defined.
+ bool IsValid(const TensorObjectDef& def, const TensorObject& object);
+
+diff --git a/tensorflow/lite/delegates/gpu/cl/api.h b/tensorflow/lite/delegates/gpu/cl/api.h
+index 65671117..c339f3f0 100644
+--- a/tensorflow/lite/delegates/gpu/cl/api.h
++++ b/tensorflow/lite/delegates/gpu/cl/api.h
+@@ -20,7 +20,9 @@ limitations under the License.
+ #define EGL_NO_PROTOTYPES
+ #endif
+
++#ifdef TFLITE_GPU_LIB_FIX
+ #include <EGL/egl.h>
++#endif
+
+ #include <cstdint>
+ #include <memory>
+@@ -115,9 +117,10 @@ struct InferenceEnvironmentOptions {
+ // It is the error to set egl_display, egl_context AND context at the same
+ // time. If egl_display and egl_context are set, they will be used to create
+ // GL-aware CL context.
++#ifdef TFLITE_GPU_LIB_FIX
+ EGLDisplay egl_display = EGL_NO_DISPLAY;
+ EGLContext egl_context = EGL_NO_CONTEXT;
+-
++#endif //TFLITE_GPU_LIB_FIX
+ // Should contain data returned from
+ // InferenceEnvironment::GetSerializedBinaryCache method.
+ // Invalid or incompatible data will be discarded. Compiled binary may become
+@@ -125,7 +128,11 @@ struct InferenceEnvironmentOptions {
+ absl::Span<const uint8_t> serialized_binary_cache;
+
+ bool IsGlAware() const {
++#ifdef TFLITE_GPU_LIB_FIX
+ return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY;
++#else //TFLITE_GPU_LIB_FIX
++ return false;
++#endif //TFLITE_GPU_LIB_FIX
+ }
+ };
+
+diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h
+index a5435c4f..e088355b 100644
+--- a/tensorflow/lite/delegates/gpu/cl/arguments.h
++++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
+@@ -23,7 +23,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+ #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/util.h"
+ #include "tensorflow/lite/delegates/gpu/common/access_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -78,11 +80,12 @@ class Arguments : public ArgumentsBinder {
+ ~Arguments() override = default;
+
+ private:
++#ifdef TFLITE_GPU_LIB_FIX
+ friend flatbuffers::Offset<data::Arguments> Encode(
+ const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
+ friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
+ Arguments* args);
+-
++#endif
+ void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
+ void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
+ void AddImage2DArray(const std::string& name,
+diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_object.h b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+index abd77a44..ac1b7f00 100644
+--- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
++++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+@@ -23,7 +23,9 @@ limitations under the License.
+
+ #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/common/access_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -165,10 +167,12 @@ class GPUObjectDescriptor {
+ AccessType GetAccess() const { return access_type_; }
+
+ protected:
++#ifdef TFLITE_GPU_LIB_FIX
+ friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
+ const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
+ friend void Decode(const data::GPUObjectDescriptor* fb_obj,
+ GPUObjectDescriptor* obj);
++#endif
+ mutable std::map<std::string, std::string> state_vars_;
+ AccessType access_type_;
+ };
+diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+index ca0c0319..f3cbc863 100644
+--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
++++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+@@ -151,6 +151,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
+ return *this;
+ }
+
++#ifdef TFLITE_GPU_LIB_FIX
+ absl::Status InferenceContext::InitFromGraph(
+ const CreateInferenceInfo& create_info, const GraphFloat32& graph,
+ Environment* env, std::vector<uint8_t>* serialized_model) {
+@@ -239,6 +240,7 @@ absl::Status InferenceContext::RestoreDeserialized(
+ }
+ return absl::OkStatus();
+ }
++#endif
+
+ absl::Status InferenceContext::InitFromGraphWithTransforms(
+ const CreateInferenceInfo& create_info, GraphFloat32* graph,
+diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
+index ec8055eb..871af9dd 100644
+--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
++++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
+@@ -31,7 +31,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+ #include "tensorflow/lite/delegates/gpu/cl/precision.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/model.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -100,12 +102,14 @@ class InferenceContext {
+ private:
+ enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
+
++#ifdef TFLITE_GPU_LIB_FIX
+ friend flatbuffers::Offset<data::InferenceContext> Encode(
+ const InferenceContext& inference,
+ flatbuffers::FlatBufferBuilder* builder);
+ friend absl::Status Decode(CLContext* context,
+ const data::InferenceContext* fb_inference,
+ InferenceContext* inference);
++#endif
+
+ void CopyInAndOutIds(const GraphFloat32& graph);
+ absl::Status ConvertOperations(const DeviceInfo& device_info,
+diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+index 57d8690c..8178e2de 100644
+--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
++++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+@@ -30,7 +30,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
+ #include "tensorflow/lite/delegates/gpu/cl/precision.h"
+ #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+ #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+@@ -169,11 +171,12 @@ class GPUOperation {
+ bool check_src_channels_size_ = false;
+
+ protected:
++#ifdef TFLITE_GPU_LIB_FIX
+ friend flatbuffers::Offset<data::GPUOperation> Encode(
+ const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
+ friend absl::Status Decode(CLContext* context,
+ const data::GPUOperation* fb_op, GPUOperation* op);
+-
++#endif
+ virtual absl::Status BindArguments(ArgumentsBinder* args) {
+ return absl::OkStatus();
+ }
+diff --git a/tensorflow/lite/delegates/gpu/cl/program_cache.cc b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
+index 285aa06d..f636a909 100644
+--- a/tensorflow/lite/delegates/gpu/cl/program_cache.cc
++++ b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
+@@ -18,9 +18,13 @@ limitations under the License.
+ #include <cstdint>
+ #include <string>
+
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "flatbuffers/flatbuffers.h" // from @flatbuffers
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/compiled_program_cache_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/util.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+ #include <farmhash.h>
+@@ -82,6 +86,7 @@ absl::Status ProgramCache::GetOrCreateCLKernel(const std::string& code,
+ return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
+ }
+
++#ifdef TFLITE_GPU_LIB_FIX
+ absl::Status ProgramCache::AddSerializedCache(
+ const CLContext& context, const CLDevice& device,
+ absl::Span<const uint8_t> serialized_cache) {
+@@ -143,6 +148,7 @@ absl::Status ProgramCache::GetSerializedCache(
+ builder.GetSize());
+ return absl::OkStatus();
+ }
++#endif
+
+ } // namespace cl
+ } // namespace gpu
+diff --git a/tensorflow/lite/delegates/gpu/common/types.h b/tensorflow/lite/delegates/gpu/common/types.h
+index 4ddb46f3..2b692f0b 100644
+--- a/tensorflow/lite/delegates/gpu/common/types.h
++++ b/tensorflow/lite/delegates/gpu/common/types.h
+@@ -34,9 +34,9 @@ class alignas(2) half {
+ HalfBits bits;
+
+ half() = default;
+-
++#ifdef TFLITE_GPU_LIB_FIX
+ half(const half& f) : bits(f.bits) {}
+-
++#endif
+ explicit half(float other) { bits = fp16_ieee_from_fp32_value(other); }
+
+ void operator=(float f) { *this = half(f); }
diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
new file mode 100644
index 000000000..f1debe775
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
@@ -0,0 +1,74 @@
+#
+# Download Tensorflow 2.4.1, use gpu delegate codes only
+#
+
+function(_TensorFlowGpuSource_Import)
+ SET(PATCH_FILE_CHECK "20211014")
+ SET(DATE_STAMP_PATH "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU.stamp")
+
+ set(PATCH_DONE FALSE)
+ if(EXISTS ${DATE_STAMP_PATH})
+ file(STRINGS ${DATE_STAMP_PATH} OBTAINED_CONTENT)
+ if(${OBTAINED_CONTENT} STREQUAL "${PATCH_FILE_CHECK}")
+ set(PATCH_DONE "TRUE")
+ endif()
+ endif()
+
+ if(${PATCH_DONE} STREQUAL "TRUE")
+ message(STATUS "Skip downloading TensorFlowGpuSource")
+ set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
+ set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
+ set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
+ return()
+ else(${PATCH_DONE} STREQUAL "TRUE")
+ # PATCH_DONE FALSE
+ message(STATUS "TensorFlowGpuSource patch not found!")
+ endif(${PATCH_DONE} STREQUAL "TRUE")
+
+ # Download TFLite Source Code
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+ envoption(TENSORFLOW_2_4_1_URL https://github.com/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
+ ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
+
+ # Patch for non used codes on onert backend/gpu_cl
+ # ToDo: Do it more simpler
+ set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU")
+
+ # remove & copy gpu delegate source codes only
+ if(EXISTS ${TENSORFLOWGPU_SOURCE_DIR})
+ file(REMOVE_RECURSE "${TENSORFLOWGPU_SOURCE_DIR}")
+ endif()
+
+ file(MAKE_DIRECTORY "${TENSORFLOWGPU_SOURCE_DIR}")
+ execute_process(
+ WORKING_DIRECTORY "${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
+ COMMAND bash -c "cp -r --parents ./tensorflow/lite/delegates/gpu ../TENSORFLOW_GPU"
+ )
+
+ # Create Stamp
+ set(_remove_path "${TENSORFLOWGPU_SOURCE_DIR}.stamp")
+ if(EXISTS ${_remove_path})
+ file(REMOVE ${_remove_path})
+ endif()
+ execute_process(
+ WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU"
+ COMMAND bash -c "patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/TensorFlowGpuSource/patch_for_gpu_cl_build.patch"
+ )
+ file(WRITE ${DATE_STAMP_PATH} "${PATCH_FILE_CHECK}")
+ set(TENSORFLOWGPU_SOURCE_DIR "${TENSORFLOWGPU_SOURCE_DIR}" PARENT_SCOPE)
+ set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
+ set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
+
+ execute_process(
+ WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}"
+ COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}.stamp"
+ COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
+ )
+endfunction(_TensorFlowGpuSource_Import)
+
+if(NOT TensorFlowGpuSource_FOUND)
+ _TensorFlowGpuSource_Import()
+else()
+ set(TensorFlowGpuSource_FOUND FALSE PARENT_SCOPE)
+endif(NOT TensorFlowGpuSource_FOUND)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
index c35617497..a57d7f4cb 100644
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
@@ -1,7 +1,9 @@
# NOTE The followings SHOULD be defined before using this CMakeLists.txt
+# NOTE TensorFlow 1.13.1 uses flatbuffers-1.10
+# but we use flatbuffers-2.0 to match with all other modules flatbuffers version.
#
# 'TensorFlowSource_DIR' variable
-# 'FlatBuffersSource_DIR' variable
+# 'flatbuffers-2.0' target
# 'eigen' target
# 'gemmlowp' target
# 'neon2sse' target
@@ -37,10 +39,9 @@ CHECK_CXX_COMPILER_FLAG(-Wno-extern-c-compat COMPILER_SUPPORT_EXTERN_C_COMPAT_WA
add_library(tensorflowlite-1.13.1 ${SRCS})
set_target_properties(tensorflowlite-1.13.1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(tensorflowlite-1.13.1 PUBLIC ${TensorFlowSource_DIR})
-target_include_directories(tensorflowlite-1.13.1 PUBLIC ${FlatBuffersSource_DIR}/include)
target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-ignored-attributes)
if(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-extern-c-compat)
endif(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
target_compile_definitions(tensorflowlite-1.13.1 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-target_link_libraries(tensorflowlite-1.13.1 eigen gemmlowp neon2sse farmhash abseil dl)
+target_link_libraries(tensorflowlite-1.13.1 flatbuffers-2.0 eigen gemmlowp neon2sse farmhash abseil dl)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
index 2c6bd9f7a..ea2065850 100644
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
@@ -6,12 +6,12 @@ function(_TensorFlowLite_import)
return()
endif(NOT TensorFlowSource_FOUND)
- nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
+ nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
- if(NOT FlatBuffersSource_FOUND)
+ if(NOT FlatBuffers_FOUND)
set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
return()
- endif(NOT FlatBuffersSource_FOUND)
+ endif(NOT FlatBuffers_FOUND)
nnas_find_package(Farmhash QUIET)
diff --git a/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt b/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
new file mode 100644
index 000000000..c69e0bb85
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
@@ -0,0 +1,72 @@
+#
+# Tensorflow Lite GPU delegate library 2.4.1
+#
+
+set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
+
+#TENSORFLOWGPU_SOURCE_DIR
+set(REF_TENSORFLOW_SRC_BASE ${TENSORFLOWGPU_SOURCE_DIR})
+set(REF_TENSORFLOW_LITE_SRC_BASE ${REF_TENSORFLOW_SRC_BASE}/tensorflow/lite)
+set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${REF_TENSORFLOW_LITE_SRC_BASE}/delegates/gpu")
+
+set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
+file(GLOB GPU_CL_SRC_LIST "${SRC_BASE}/cl/*.cc"
+ "${SRC_BASE}/cl/kernels/*.cc"
+ "${SRC_BASE}/cl/kernels/special/*.cc"
+ "${SRC_BASE}/cl/kernels/selectors/*.cc"
+ "${SRC_BASE}/cl/selectors/*.cc"
+ "${SRC_BASE}/common/*.cc"
+# Available, but not needed yet
+# "${SRC_BASE}/common/default/*.cc"
+# "${SRC_BASE}/common/memory_managements/*.cc"
+# "${SRC_BASE}/common/transformations/*.cc"
+ )
+
+file(GLOB GPU_CL_HDRS_GLOB "${SRC_BASE}/cl/*.h"
+ "${SRC_BASE}/cl/kernels/*.h"
+ "${SRC_BASE}/cl/kernels/special/*.h"
+ "${SRC_BASE}/cl/kernels/selectors/*.h"
+ "${SRC_BASE}/cl/selectors/*.h"
+ "${SRC_BASE}/common/*.h"
+ "${SRC_BASE}/common/default/*.h"
+ "${SRC_BASE}/common/memory_managements/*.h"
+ "${SRC_BASE}/common/transformations/*.h"
+ )
+list(APPEND GPU_CL_SRC_LIST "${_GPU_CL_HDRS_GLOB}")
+
+file(GLOB REMOVE_TEST_SRCS "${SRC_BASE}/cl/*_test*.cc"
+ "${SRC_BASE}/cl/testing/*.cc"
+ "${SRC_BASE}/cl/kernels/*_test*.cc"
+ "${SRC_BASE}/common/*_test*.cc"
+ "${SRC_BASE}/common/transformations/*_test*.cc"
+ )
+# Not available
+file(GLOB REMOVE_SRCS "${SRC_BASE}/cl/*gl*.cc"
+ "${SRC_BASE}/cl/gpu_api_delegate.cc"
+ "${SRC_BASE}/cl/serialization.cc"
+ "${SRC_BASE}/common/lstm_parser.cc"
+ "${SRC_BASE}/common/model_builder.cc"
+ "${SRC_BASE}/common/model_builder_helper.cc"
+ "${SRC_BASE}/common/object_reader.cc"
+ "${SRC_BASE}/common/quantization_util.cc"
+ "${SRC_BASE}/common/memory_management/*_test.cc"
+ )
+
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
+list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
+
+add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TENSORFLOWGPU_SOURCE_DIR}")
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16)
+
+add_library(tflitegpu_ignore_warnings INTERFACE)
+target_compile_options(tflitegpu_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} INTERFACE tflitegpu_ignore_warnings)
+
+# GL codes are not used on gpu_cl
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
+
+# Applying PIC first, currently used on gpu_cl only
+set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..4abe2eae6
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+ if(NOT DOWNLOAD_TENSORFLOW)
+ set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_TENSORFLOW)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(TENSORFLOW_2_8_0_URL https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
+
+ ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL})
+
+ set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake
index 6585f21d5..2ad2e241e 100644
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake
@@ -1,4 +1,4 @@
-set(PACKAGE_VERSION "1.10")
+set(PACKAGE_VERSION "2.8.0")
set(PACKAGE_VERSION_EXACT FALSE)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
set(PACKAGE_VERSION_UNSUITABLE TRUE)
diff --git a/infra/command/format b/infra/command/format
index 75e6184d3..5cf9606fa 100644
--- a/infra/command/format
+++ b/infra/command/format
@@ -4,6 +4,7 @@ INVALID_EXIT=0
FILES_TO_CHECK=()
DIRECTORIES_TO_BE_TESTED=()
DIRECTORIES_NOT_TO_BE_TESTED=()
+DEFAULT_CLANG_FORMAT="clang-format-8"
CLANG_FORMAT_CANDIDATES=()
PATCH_FILE=format.patch
CHECK_DIFF_ONLY="0"
@@ -16,7 +17,7 @@ function Usage()
echo "If <file>s are given, it reformats the files"
echo ""
echo "Options:"
- echo " --clang-format <TOOL> clang format bin (default: clang-format-3.9, clang-format)"
+ echo " --clang-format <TOOL> clang format bin (default: $DEFAULT_CLANG_FORMAT)"
echo " --diff-only check diff files with master"
echo " --staged-only check git staged files"
}
@@ -65,39 +66,14 @@ function command_exists() {
command -v $1 > /dev/null 2>&1
}
-function exclude_symbolic_links() {
- # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
- if [[ ${#FILES_TO_CHECK} -ne 0 ]]; then
- FILES_EXCLUDE_SYMLINKS=$(file ${FILES_TO_CHECK} | grep -v "symbolic link" | cut -d':' -f1)
- FILES_TO_CHECK=${FILES_EXCLUDE_SYMLINKS}
- fi
-}
-
function check_newline() {
- FILES_TO_CHECK_CR=()
- for f in ${FILES_TO_CHECK[@]}; do
- # Manually ignore style checking
- if [[ ${f} == !(*.svg|*.pdf|*.png) ]]; then
- FILES_TO_CHECK_CR+=("${f}")
- fi
- done
+ # Exclude binary (refer .gitattributes file)
+ # TODO Remove svg file excluding
+ # .svg: xml type ML for vector graphic
+ FILES_TO_CHECK_EOF=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep -v '((\.caffemodel)|(\.png)|(\.pdf)|(\.h5)|(\.pdf)|(\.tar.gz)|(\.tflite)|(\.pdf)|(\.bmp)|(\.svg))$'`
- # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
- if [[ ${#FILES_TO_CHECK_CR} -ne 0 ]]; then
- CRCHECK=$(file ${FILES_TO_CHECK_CR} | grep 'with CR')
- else
- return
- fi
- FILES_TO_FIX=($(echo "$CRCHECK" | grep "with CRLF line" | cut -d':' -f1))
- for f in ${FILES_TO_FIX[@]}; do
- tr -d '\r' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
- done
- FILES_TO_FIX=($(echo "${CRCHECK}" | grep "with CR line" | cut -d':' -f1))
- for f in ${FILES_TO_FIX[@]}; do
- tr '\r' '\n' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
- done
- # Check no new line at end of file
- for f in ${FILES_TO_CHECK_CR[@]}; do
+ for f in ${FILES_TO_CHECK_EOF[@]}; do
+ # Check no new line at end of file
if diff /dev/null "$f" | tail -1 | grep '^\\ No newline' > /dev/null; then
echo >> "$f"
fi
@@ -106,23 +82,19 @@ function check_newline() {
function check_permission() {
# Check all files except script
- FILES_TO_CHECK_PERMISSION=()
- for f in ${FILES_TO_CHECK[@]}; do
- # Manually ignore permission checking
- if [[ ${f} == !(nnas|nnfw|nncc|*.sh|*.py|*/gradlew|infra/debian/compiler/rules|infra/debian/runtime/rules) ]] \
- || [[ ${f} == tests/nnapi/specs/**/*.py ]]; then
- FILES_TO_CHECK_PERMISSION+=("${f}")
- fi
- done
+ # Manually ignore permission checking
+ FILES_TO_CHECK_PERMISSION=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep '^100755' | cut -f2)
+ FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | tr ' ' '\n' | egrep -v '((^nnas)|(^nnfw)|(^nncc)|(\.sh)|(\.py)|(/gradlew))$'`
+ FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | egrep -v '((^infra/debian/compiler/rules)|(^infra/debian/runtime/rules))$'`
+ FILES_TO_CHECK_PERMISSION+=`echo && echo "$FILES_TO_CHECK" | egrep '^tests/nnapi/specs/.*.py$'`
+ # Transform to array
+ FILES_TO_CHECK_PERMISSION=($FILES_TO_CHECK_PERMISSION)
if [[ ${#FILES_TO_CHECK_PERMISSION} -eq 0 ]]; then
return
fi
- for FILE_TO_CHECK in ${FILES_TO_CHECK_PERMISSION[@]}; do
- RESULT=$(stat -c '%A' ${FILE_TO_CHECK} | grep 'x')
- if [ "${RESULT}" != "" ]; then
- chmod a-x ${FILE_TO_CHECK}
- fi
+ for f in ${FILES_TO_CHECK_PERMISSION[@]}; do
+ chmod a-x $f
done
}
@@ -132,7 +104,7 @@ function check_cpp_files() {
return
fi
- CLANG_FORMAT_CANDIDATES+=("clang-format-8")
+ CLANG_FORMAT_CANDIDATES+=($DEFAULT_CLANG_FORMAT)
for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
@@ -141,25 +113,18 @@ function check_cpp_files() {
done
if [[ -z ${CLANG_FORMAT} ]]; then
- echo "[ERROR] clang-format-8 is unavailable"
+ echo "[ERROR] $CLANG_FORMAT is unavailable"
echo
- echo " Please install clang-format-8 before running format check"
+ echo " Please install $DEFAULT_CLANG_FORMAT before running format check"
exit 1
fi
- # Check c++ files
- FILES_TO_CHECK_CPP=()
- for f in ${FILES_TO_CHECK[@]}; do
- # Manually ignore style checking
- if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
- continue
- fi
-
- # File extension to check
- if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
- FILES_TO_CHECK_CPP+=("${f}")
- fi
- done
+ # Check c++ files: replace ' ' with newline, check with grep
+ FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '((\.c[cl]?)|(\.cpp)|(\.h(pp)?))$'`
+ # Manually ignore style checking
+ FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK_CPP" | egrep -v '((/NeuralNetworks\.h)|(/NeuralNetworksExtensions\.h))$'`
+ # Transform to array
+ FILES_TO_CHECK_CPP=($FILES_TO_CHECK_CPP)
# Skip by '.FORMATDENY' file
for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
@@ -189,21 +154,12 @@ function check_python_files() {
fi
# Check python files
- FILES_TO_CHECK_PYTHON=()
- for f in ${FILES_TO_CHECK[@]}; do
- # File extension to check
- if [[ ${f} == *.py ]]; then
- FILES_TO_CHECK_PYTHON+=("${f}")
- fi
- # Exceptional case: one-cmds don't have '.py' extension
- if [[ ${f} == compiler/one-cmds/* ]]; then
- # Ignore non-python source (cmake, etc)
- # Ignore shell script: one-prepare-venv
- if [[ ${f} != compiler/one-cmds/*.* ]] && [[ ${f} != compiler/one-cmds/one-prepare-venv ]]; then
- FILES_TO_CHECK_PYTHON+=("${f}")
- fi
- fi
- done
+ FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`
+ # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
+ FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK_PYTHON" | egrep -v '^compiler/one-cmds/.*\..*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`
+ # Transform to array
+ FILES_TO_CHECK_PYTHON=($FILES_TO_CHECK_PYTHON)
+
for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
skip=${s#'.'/}/
FILES_TO_CHECK_PYTHON=(${FILES_TO_CHECK_PYTHON[*]/$skip*/})
@@ -228,7 +184,13 @@ fi
__Check_CPP=${CHECK_CPP:-"1"}
__Check_PYTHON=${CHECK_PYTHON:-"1"}
-FILES_TO_CHECK=$(git ls-files -c --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]})
+# Git file mode
+# 120000: symbolic link
+# 160000: git link
+# 100755: regular executable
+# 100644: regular readable
+# Reference: https://github.com/git/git/blob/cd42415/Documentation/technical/index-format.txt#L72-L81
+FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]} | egrep -v '^1[26]0000' | cut -f2)
if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
MASTER_EXIST=$(git rev-parse --verify master)
CURRENT_BRANCH=$(git branch | grep \* | cut -d ' ' -f2-)
@@ -243,6 +205,7 @@ if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
else
FILES_TO_CHECK=$(git diff --name-only --diff-filter=d HEAD~${DIFF_COMMITS})
fi
+ FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep -v '^1[26]0000' | cut -f2)
fi
fi
@@ -250,7 +213,6 @@ for DIR_NOT_TO_BE_TESTED in $(git ls-files -co --exclude-standard '*/.FORMATDENY
DIRECTORIES_NOT_TO_BE_TESTED+=($(dirname "${DIR_NOT_TO_BE_TESTED}"))
done
-exclude_symbolic_links
check_newline
check_permission
check_cpp_files
diff --git a/infra/debian/compiler/control b/infra/debian/compiler/control
index 99fa479f2..b3a3c1bf7 100644
--- a/infra/debian/compiler/control
+++ b/infra/debian/compiler/control
@@ -2,14 +2,14 @@ Source: one
Section: devel
Priority: extra
Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
-Build-Depends: cmake, debhelper (>=9), dh-python, python3-all
+Build-Depends: cmake, debhelper (>=9), dh-python, python3-all, python3.8, python3.8-venv
Standards-Version: 3.9.8
Homepage: https://github.com/Samsung/ONE
Package: one-compiler
Architecture: amd64
Multi-Arch: foreign
-Depends: ${misc:Depends}, ${shlibs:Depends}, python3-venv, python3-pip
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3-venv, python3-pip, python3.8, python3.8-venv
Description: On-device Neural Engine compiler package
Package: one-compiler-dev
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install
index cbca47802..805ba8677 100644
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -1,7 +1,7 @@
# {FILES_TO_INSTALL} {DEST_DIR}
# bin
usr/bin/circle2circle usr/share/one/bin/
-usr/bin/circle_partitioner usr/share/one/bin/
+usr/bin/circle-partitioner usr/share/one/bin/
usr/bin/circle-quantizer usr/share/one/bin/
usr/bin/generate_bcq_metadata.py usr/share/one/bin/
usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
@@ -22,6 +22,8 @@ usr/bin/one-prepare-venv usr/share/one/bin/
usr/bin/one-profile usr/share/one/bin/
usr/bin/one-quantize usr/share/one/bin/
usr/bin/one-version usr/share/one/bin/
+usr/bin/onelib/constant.py usr/share/one/bin/onelib/
+usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/
usr/bin/rawdata2hdf5 usr/share/one/bin/
usr/bin/record-minmax usr/share/one/bin/
usr/bin/tf2nnpkg usr/share/one/bin/
diff --git a/infra/debian/compiler/postinst b/infra/debian/compiler/postinst
index a339d06c1..d84e8e042 100644
--- a/infra/debian/compiler/postinst
+++ b/infra/debian/compiler/postinst
@@ -9,4 +9,4 @@ set -e
# which causes invalid permission problem.
# e.g. When `pip` installs user packages, it proceeds based on $HOME.
# To proper installation, $HOME should be root.
-su - $(whoami) -c '/usr/share/one/bin/one-prepare-venv' # $(whoami) = root
+su - $(whoami) -p -c '/usr/share/one/bin/one-prepare-venv' # $(whoami) = root
diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules
index e42faae09..145634d03 100755
--- a/infra/debian/compiler/rules
+++ b/infra/debian/compiler/rules
@@ -1,7 +1,7 @@
#!/usr/bin/make -f
export DH_VERBOSE = 1
export NNAS_BUILD_PREFIX = build
-export PRESET = 20210910
+export PRESET = 20220323
export _DESTDIR = debian/tmp/usr
%:
diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile
index c3d5b3e95..dbc22a6e8 100644
--- a/infra/docker/bionic/Dockerfile
+++ b/infra/docker/bionic/Dockerfile
@@ -41,9 +41,12 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
- apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
+ apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install numpy
# Install google test (source)
RUN apt-get update && apt-get -qqy install libgtest-dev
diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile
index 45faa6975..6f3cd9b60 100644
--- a/infra/docker/focal/Dockerfile
+++ b/infra/docker/focal/Dockerfile
@@ -31,12 +31,12 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo
# Install protocol buffer
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
-# Additonal tools (except clang-format-3.9)
+# Additonal tools
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy
# Install google test (source)
RUN apt-get update && apt-get -qqy install libgtest-dev
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index bde684938..2ff5a5f6a 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -40,12 +40,19 @@ macro(nnas_include PREFIX)
endmacro(nnas_include)
macro(nnas_find_package PREFIX)
- find_package(${PREFIX} CONFIG NO_DEFAULT_PATH
- PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
- ${ARGN}
- )
+ find_package(${PREFIX}
+ CONFIG NO_DEFAULT_PATH
+ PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+ ${ARGN})
endmacro(nnas_find_package)
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+ find_package(${PREFIX}
+ CONFIG NO_DEFAULT_PATH
+ PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+ ${ARGN})
+endmacro(nnas_find_package_folder)
+
# nncc_find_resource(NAME) will update the following variables
#
# NAME_FOUND
@@ -81,31 +88,12 @@ message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
#
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
-###
-### Configuration
-###
-option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
-option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
-option(DOWNLOAD_EIGEN "Download Eigen source" ON)
-option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
-option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
-option(DOWNLOAD_RUY "Download ruy source" ON)
-option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
-option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
-option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
-option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
-option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
-option(DOWNLOAD_CAFFE "Download Caffe source" ON)
-option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
-option(DOWNLOAD_ONNX "Download ONNX source" ON)
-option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
-option(DOWNLOAD_OPENCL_HEADERS "Download OpenCl Header source" ON)
-option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
-
-option(DOWNLOAD_GTEST "Download Google Test source" ON)
-option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
-option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
-option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("cmake/CfgOptionFlags.cmake")
nnas_find_package(GTest QUIET)
@@ -124,16 +112,9 @@ if(${ENABLE_TEST})
include(CTest)
endif(${ENABLE_TEST})
-option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
-
-# This option might be turned ON for Windows native build.
-# Check our ProtobufConfig.cmake for its usage.
-option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
-
-# This option might be turned ON for MCU builds of luci related components.
-# It specify which library type to use for build:
-# if set ON - luci libraries are static, otherwise - shared.
-option(STATIC_LUCI "Build luci as a static libraries" OFF)
+# apply compilation flags
+# NOTE this should be after all option
+include("cmake/ApplyCompileFlags.cmake")
###
### Target
diff --git a/infra/nncc/Makefile.arm32 b/infra/nncc/Makefile.arm32
new file mode 100644
index 000000000..22d96e71d
--- /dev/null
+++ b/infra/nncc/Makefile.arm32
@@ -0,0 +1,146 @@
+#
+# NOTE this is provided as experimental Makefile to ARM32 cross building
+# some modules of compiler.
+#
+
+BUILD_TYPE?=Debug
+
+CURRENT_DIR=$(shell pwd)
+BUILDFOLDER=build
+ARM32_FOLDER=arm32
+ROOTFS_ARM?=$(CURRENT_DIR)/tools/cross/rootfs/arm
+NNCC_CFG_OPTION_EXTRA?=
+
+TYPE_FOLDER=$(shell echo $(BUILD_TYPE) | tr A-Z a-z)
+
+BUILD_ARM32_FOLDER=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER)
+BUILD_ARM32_HOST=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host
+
+ARM32_INSTALL_FOLDER=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).install
+ARM32_INSTALL_HOST=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host.install
+
+# ARM32 build
+ARM32_BUILD_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_BUILD_ITEMS+=;pepper-csv2vec;crew
+ARM32_BUILD_ITEMS+=;oops;pepper-assert
+ARM32_BUILD_ITEMS+=;hermes;hermes-std
+ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
+ARM32_BUILD_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_BUILD_ITEMS+=;dio-hdf5
+ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_BUILD_ITEMS+=;luci
+ARM32_BUILD_ITEMS+=;luci-interpreter
+ARM32_BUILD_ITEMS+=;tflite2circle
+ARM32_BUILD_ITEMS+=;tflchef;circlechef
+ARM32_BUILD_ITEMS+=;circle2circle;record-minmax;circle-quantizer
+ARM32_BUILD_ITEMS+=;luci-eval-driver;luci-value-test
+
+ARM32_TOOLCHAIN_FILE=cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+
+ARM32_HOST_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_HOST_ITEMS+=;pepper-csv2vec
+ARM32_HOST_ITEMS+=;oops
+ARM32_HOST_ITEMS+=;hermes;hermes-std
+ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
+ARM32_HOST_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_HOST_ITEMS+=;luci
+ARM32_HOST_ITEMS+=;luci-interpreter
+ARM32_HOST_ITEMS+=;tflite2circle
+ARM32_HOST_ITEMS+=;tflchef;circlechef
+ARM32_HOST_ITEMS+=;circle-tensordump
+ARM32_HOST_ITEMS+=;circle2circle
+ARM32_HOST_ITEMS+=;common-artifacts
+ARM32_HOST_ITEMS+=;luci-eval-driver;luci-value-test
+
+
+_SPACE_:=
+_SPACE_+=
+ARM32_BUILD_WHITELIST=$(subst $(_SPACE_),,$(ARM32_BUILD_ITEMS))
+ARM32_HOST_WHITELIST=$(subst $(_SPACE_),,$(ARM32_HOST_ITEMS))
+
+NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF
+
+NNCC_CFG_STRICT= -DENABLE_STRICT_BUILD=ON
+
+INT_TARGETS:=int_configure_arm32 int_configure_arm32_host \
+ int_build_arm32 int_build_arm32_host int_test_arm32_host int_test
+
+NNCC_ARM32_DEBUG= -DBUILD_WHITELIST="$(ARM32_BUILD_WHITELIST)"
+NNCC_ARM32_DEBUG_HOST= -DBUILD_WHITELIST="$(ARM32_HOST_WHITELIST)"
+
+DEF_TARGETS:=all
+
+VAL_TARGETS:=cfg debug test_prep test
+
+.PHONY: $(INT_TARGETS) $(DEF_TARGETS) $(VAL_TARGETS)
+
+.DEFAULT_GOAL: help
+
+help:
+ @echo "cfg : debug configure"
+ @echo "debug : debug build"
+ @echo "test_prep: debug test preparation"
+ @echo "test : debug test in target"
+
+###############################################################################
+# do not call int_xxxx directly as the depend on environment variables
+
+#
+# configures
+#
+
+int_configure_arm32_host:
+ NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc configure \
+ $(NNCC_CFG_OPTION) \
+ $(NNCC_ARM32_DEBUG_HOST) $(NNCC_CFG_STRICT) \
+ -DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_HOST)" \
+ -DENABLE_TEST=ON
+
+int_configure_arm32:
+ ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+ BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+ NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc configure \
+ $(NNCC_CFG_OPTION) $(NNCC_CFG_OPTION_EXTRA) \
+ $(NNCC_ARM32_DEBUG) $(NNCC_CFG_STRICT) \
+ -DCMAKE_TOOLCHAIN_FILE=$(ARM32_TOOLCHAIN_FILE) \
+ -DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_FOLDER)" \
+ -DENABLE_TEST=ON
+
+
+#
+# builds
+#
+int_build_arm32_host:
+ NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j1
+
+int_build_arm32:
+ ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+ BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+ NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j1
+
+#
+# host test; run test in host to generate random input and expected outputs
+#
+int_test_arm32_host:
+ NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc test
+
+#
+# tests: run in ARM32 Ubuntu 18.04 device
+#
+int_test:
+ NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc test
+
+################################################################################
+
+all: int_configure_arm32_host int_build_arm32_host int_configure_arm32 int_build_arm32
+
+cfg: int_configure_arm32_host int_build_arm32_host int_configure_arm32
+
+debug: int_build_arm32
+
+# NOTE before run test in ARM32, run test in host is required to prepare test data
+test_prep: int_test_arm32_host
+
+# NOTE run test in ARM32 Ubuntu 18.04 device
+test: int_test
diff --git a/infra/nncc/cmake/ApplyCompileFlags.cmake b/infra/nncc/cmake/ApplyCompileFlags.cmake
new file mode 100644
index 000000000..0cc5f9cd1
--- /dev/null
+++ b/infra/nncc/cmake/ApplyCompileFlags.cmake
@@ -0,0 +1,35 @@
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+ include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
diff --git a/infra/nncc/cmake/CfgOptionFlags.cmake b/infra/nncc/cmake/CfgOptionFlags.cmake
new file mode 100644
index 000000000..773a1f7d0
--- /dev/null
+++ b/infra/nncc/cmake/CfgOptionFlags.cmake
@@ -0,0 +1,58 @@
+#
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+# (platform specific setting have higher priority)
+#
+include("cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
+option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
+option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+option(DOWNLOAD_CAFFE "Download Caffe source" ON)
+option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
+option(DOWNLOAD_ONNX "Download ONNX source" ON)
+option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download OpenCl Header source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(DOWNLOAD_JSONCPP "Download Jsoncpp source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
+option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
+option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+
+option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
+
+# This option might be turned ON for Windows native build.
+# Check our ProtobufConfig.cmake for its usage.
+option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
+
+# This option might be turned ON for MCU builds of luci related components.
+# It specify which library type to use for build:
+# if set ON - luci libraries are static, otherwise - shared.
+option(STATIC_LUCI "Build luci as a static libraries" OFF)
+
+# Disable PIC(Position-Independent Code) option for luci-interpreter related components.
+# This option might be turned ON for MCU builds.
+#
+# Enabled PIC requires additional efforts for correct linkage, such as
+# implementation of trampoline functions and support of various address tables.
+# PIC is used for dynamic libraries, MCU builds of interpreter
+# do not benefit from it, so we prefer to disable PIC.
+option(NNCC_LIBRARY_NO_PIC "Disable PIC option for libraries" OFF)
+
+# one-cmds PyTorch importer is an experimental feature, it is not used in default configuration.
+# This option enables installation of one-import-pytorch utility and
+# generation of related testsuite.
+option(ENABLE_ONE_IMPORT_PYTORCH "Enable deploy of one-cmds pytoch importer and related tests" OFF)
diff --git a/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
new file mode 100644
index 000000000..c800f335e
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
@@ -0,0 +1,24 @@
+#
+# armv7l linux compile options
+#
+
+message(STATUS "Building for ARMv7l Linux")
+
+# include linux common
+include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-mcpu=cortex-a7"
+ "-mfloat-abi=hard"
+ "-ftree-vectorize"
+ "-mfp16-format=ieee"
+ )
+
+if(BUILD_ARM32_NEON)
+ set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-mfpu=neon-vfpv4"
+ )
+else(BUILD_ARM32_NEON)
+ message(STATUS "ARMv7l: NEON is disabled")
+endif(BUILD_ARM32_NEON)
diff --git a/infra/nncc/cmake/buildtool/config/config_linux.cmake b/infra/nncc/cmake/buildtool/config/config_linux.cmake
new file mode 100644
index 000000000..d7b17cfef
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_linux.cmake
@@ -0,0 +1,11 @@
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+ list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
diff --git a/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
new file mode 100644
index 000000000..4956d91f9
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -0,0 +1,38 @@
+#
+# config for arm-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR armv7l)
+
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+
+# where is the target environment
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../..")
+set(ROOTFS_ARM "${NNAS_PROJECT_SOURCE_DIR}/tools/cross/rootfs/arm")
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/OptionTools.cmake")
+
+envoption(ROOTFS_DIR ${ROOTFS_ARM})
+if(NOT EXISTS "${ROOTFS_DIR}/lib/arm-linux-gnueabihf")
+ message(FATAL_ERROR "Please prepare RootFS for ARM")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_DIR})
+set(CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+ CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+ CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Set cache variable to ignore try-run error by find_package(Threads REQUIRED) on cross build
+set(THREADS_PTHREAD_ARG "2" CACHE STRING "Result from TRY_RUN" FORCE)
diff --git a/infra/nncc/cmake/options/options_armv7l-linux.cmake b/infra/nncc/cmake/options/options_armv7l-linux.cmake
new file mode 100644
index 000000000..d1cc367ee
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7l-linux.cmake
@@ -0,0 +1,5 @@
+#
+# armv7l linux cmake options
+#
+
+option(BUILD_ARM32_NEON "Use NEON for ARM32 cross build" ON)
diff --git a/infra/nncc/cmake/options/options_x86_64-linux.cmake b/infra/nncc/cmake/options/options_x86_64-linux.cmake
new file mode 100644
index 000000000..0fb72f18b
--- /dev/null
+++ b/infra/nncc/cmake/options/options_x86_64-linux.cmake
@@ -0,0 +1,3 @@
+#
+# x86_64 linux cmake options
+#
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index 438d6b275..5371120ad 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -24,12 +24,13 @@ option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
+option(BUILD_TRIX_LOADER "Build trix loader" ON)
option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build tflite loader testing tool" ON)
option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
-option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
#
# Default build configuration for contrib
#
@@ -54,8 +55,9 @@ option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF)
# Default external libraries source download and build configuration
#
option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" OFF)
option(DOWNLOAD_ABSEIL "Download Abseil source" ON)
-option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" OFF)
option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
@@ -71,6 +73,7 @@ option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
option(BUILD_BOOST "Build boost source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
option(BUILD_RUY "Build ruy library from the downloaded source" ON)
option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake
new file mode 100644
index 000000000..dec1b4afb
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake
@@ -0,0 +1,22 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7hl(hardfp) Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-mtune=cortex-a8"
+ "-mfloat-abi=hard"
+ "-mfpu=neon-vfpv4"
+ "-funsafe-math-optimizations"
+ "-ftree-vectorize"
+ )
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
index 57d4c1061..ed6e35ebb 100644
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -4,6 +4,7 @@
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
new file mode 100644
index 000000000..aa2d2f821
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
@@ -0,0 +1,16 @@
+#
+# armv7hl tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_armv7l-linux.cmake b/infra/nnfw/cmake/options/options_armv7l-linux.cmake
index e10e573c4..325e7cc58 100644
--- a/infra/nnfw/cmake/options/options_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-linux.cmake
@@ -3,3 +3,8 @@
#
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_OPENCL_TOOL "Build OpenCL tool" ON)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index c27a7ad01..eab3b0a92 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -4,7 +4,13 @@
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_i686-tizen.cmake b/infra/nnfw/cmake/options/options_i686-tizen.cmake
index 7a425f068..14a3d555b 100644
--- a/infra/nnfw/cmake/options/options_i686-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_i686-tizen.cmake
@@ -4,6 +4,7 @@
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
index 5dbcf7e08..135cfbf6e 100644
--- a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
@@ -4,5 +4,3 @@
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
option(BUILD_XNNPACK "Build XNNPACK" OFF)
-option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-linux.cmake b/infra/nnfw/cmake/options/options_x86_64-linux.cmake
index 5dbcf7e08..1cb72d593 100644
--- a/infra/nnfw/cmake/options/options_x86_64-linux.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-linux.cmake
@@ -2,7 +2,5 @@
# x86_64 linux cmake options
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
-option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
option(BUILD_XNNPACK "Build XNNPACK" OFF)
-option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
index 0f1c86d23..31b7fd6fb 100644
--- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
@@ -2,8 +2,8 @@
# x86_64 linux cmake options
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
-option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
@@ -11,4 +11,3 @@ option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OF
option(BUILD_XNNPACK "Build XNNPACK" OFF)
option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
index 99ee795c1..878026d9a 100644
--- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
+++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
@@ -14,6 +14,8 @@ function(_CpuInfo_Build)
return()
endif(NOT CpuInfoSource_FOUND)
+ nnas_include(ExternalProjectTools)
+
set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "Build command-line tools")
set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "Build cpuinfo unit tests")
set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "Build cpuinfo mock tests")
diff --git a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
index d27ac1435..032724ae2 100644
--- a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
@@ -6,8 +6,8 @@ function(_FlatBuffers_import)
return()
endif(Flatbuffers_FOUND)
- # NOTE Tizen uses 1.12
- nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
+ # NOTE Tizen uses 2.0
+ nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
if(NOT FlatBuffersSource_FOUND)
set(FlatBuffers_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/GTestConfig.cmake b/infra/nnfw/cmake/packages/GTestConfig.cmake
index 54695531e..ab2b39e00 100644
--- a/infra/nnfw/cmake/packages/GTestConfig.cmake
+++ b/infra/nnfw/cmake/packages/GTestConfig.cmake
@@ -16,7 +16,11 @@ if(${DOWNLOAD_GTEST})
endif(${DOWNLOAD_GTEST})
### Find and use pre-installed Google Test
-find_package(GTest)
+if(NOT GTest_FOUND)
+ # Reset package config directory cache to prevent recursive find
+ unset(GTest_DIR CACHE)
+ find_package(GTest)
+endif(NOT GTest_FOUND)
find_package(Threads)
if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake
new file mode 100644
index 000000000..dfc10ebf2
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake
@@ -0,0 +1,42 @@
+# Looking for pre-installed TRIX engine package
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+function(_TRIXEngine_import)
+ # Find the header & lib
+ find_library(TRIXEngine_LIB
+ NAMES npu-engine
+ PATHS "${TRIX_ENGINE_PREFIX}/lib"
+ )
+
+ find_path(TRIXEngine_INCLUDE_DIR
+ NAMES libnpuhost.h
+ PATHS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+ )
+
+ set(TRIXEngine_FOUND TRUE)
+
+ if(NOT TRIXEngine_LIB)
+ set(TRIXEngine_FOUND FALSE)
+ endif(NOT TRIXEngine_LIB)
+
+ if(NOT TRIXEngine_INCLUDE_DIR)
+ set(TRIXEngine_FOUND FALSE)
+ endif(NOT TRIXEngine_INCLUDE_DIR)
+
+ if(NOT TRIXEngine_FOUND)
+ message(STATUS "Failed to find TRIX Engine")
+ else(NOT TRIXEngine_FOUND)
+
+ # Add target
+ if(NOT TARGET trix_engine)
+ add_library(trix_engine INTERFACE)
+ target_link_libraries(trix_engine INTERFACE ${TRIXEngine_LIB})
+ target_include_directories(trix_engine INTERFACE ${TRIXEngine_INCLUDE_DIR})
+ endif(NOT TARGET trix_engine)
+ endif(NOT TRIXEngine_FOUND)
+
+ set(TRIXEngine_FOUND ${TRIXEngine_FOUND} PARENT_SCOPE)
+ set(TRIXEngine_INCLUDE_DIRS ${TRIXEngine_INCLUDE_DIR} PARENT_SCOPE)
+endfunction(_TRIXEngine_import)
+
+_TRIXEngine_import()
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake
new file mode 100644
index 000000000..0e0a0436e
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake
@@ -0,0 +1,104 @@
+# This script need to set:
+#
+# VARIABLE | description
+# --- | ---
+# PACKAGE_VERSION | full provided version string
+# PACKAGE_VERSION_EXACT | true if version is exact match
+# PACKAGE_VERSION_COMPATIBLE | true if version is compatible
+# PACKAGE_VERSION_UNSUITABLE | true if unsuitable as any version
+#
+# Reference: https://cmake.org/cmake/help/v3.10/command/find_package.html
+
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+if(NOT PACKAGE_FIND_VERSION)
+ message(FATAL_ERROR "Please pass version requirement to use TRIX Engine dependency")
+endif()
+
+# Find the header & lib from TRIX_ENGINE_PREFIX
+find_library(TRIXEngine_LIB
+ NAMES npu-engine
+ HINTS "${TRIX_ENGINE_PREFIX}/lib"
+)
+find_path(TRIXEngine_INCLUDE_DIR
+ NAMES libnpuhost.h
+ HINTS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+)
+
+if(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+ set(PACKAGE_VERSION_EXACT FALSE)
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+ set(PACKAGE_VERSION_UNSUITABLE TRUE)
+ return()
+endif(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+
+# TODO Assert TRIX_ENGINE_PREFIX is directory
+
+# TODO Can we run this only once per configure?
+try_run(MAJOR_VER MAJOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.major"
+ SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.major.cpp"
+ CMAKE_FLAGS
+ "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+ "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+if(NOT MAJOR_COMPILABLE)
+ # This means VERSION < 2.2.7
+ # `getVersion` API introduced from TRIX Engine 2.2.7
+ if(PACKAGE_FIND_VERSION VERSION_GREATER_EQUAL 2.2.7)
+ set(PACKAGE_VERSION_EXACT FALSE)
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+ set(PACKAGE_VERSION_UNSUITABLE TRUE)
+ return()
+ else()
+ # TODO How to support this case?
+ message(FATAL_ERROR "TRIX Engine version is too low (< 2.2.7)")
+ endif()
+endif(NOT MAJOR_COMPILABLE)
+
+try_run(MINOR_VER MINOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.minor"
+ SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.minor.cpp"
+ CMAKE_FLAGS
+ "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+ "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+try_run(EXTRA_VER EXTRA_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.extra"
+ SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.extra.cpp"
+ CMAKE_FLAGS
+ "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+ "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+macro(assert)
+ # if(NOT ${ARGV}) makes error when ARGV starts with 'NOT'
+ if(${ARGV})
+ # Do nothing
+ else(${ARGV})
+ message(FATAL_ERROR "Internal error ${ARGV}")
+ endif(${ARGV})
+endmacro(assert)
+
+assert(MAJOR_COMPILABLE)
+assert(MINOR_COMPILABLE)
+assert(EXTRA_COMPILABLE)
+assert(NOT MAJOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT MINOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT EXTRA_VER STREQUAL FAILED_TO_RUN)
+
+set(PACKAGE_VERSION ${MAJOR_VER}.${MINOR_VER}.${EXTRA_VER})
+
+if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+else()
+ set(PACKAGE_VERSION_EXACT FALSE)
+endif()
+
+# Assume TRIX Engine is backward compatible
+if(PACKAGE_VERSION VERSION_GREATER_EQUAL PACKAGE_FIND_VERSION)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+else()
+ set(PACKAGE_VERSION_COMPATIBLE FALSE)
+endif()
+
+set(PACKAGE_VERSION_UNSUITABLE FALSE)
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp
new file mode 100644
index 000000000..05fe70ddb
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+ uint32_t ret = 0;
+ getVersion(nullptr, nullptr, &ret);
+ return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp
new file mode 100644
index 000000000..a3de06d65
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+ uint32_t ret = 0;
+ getVersion(&ret, nullptr, nullptr);
+ return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp
new file mode 100644
index 000000000..1193a5c18
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+ uint32_t ret = 0;
+ getVersion(nullptr, &ret, nullptr);
+ return ret;
+}
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf
index bad9eb204..1150a5fc8 100644
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -18,5 +18,5 @@ url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/pa
url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
[repo.tizen_one]
-url = http://nnfw.mooo.com/archive/tizen/
+url = http://13.125.34.93/archive/tizen/
diff --git a/infra/packaging/build b/infra/packaging/build
index 53d63713b..5d6bdd999 100644
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,10 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
fi
# The default preset
-PRESET="20210910"
+PRESET="20220323"
+
+# Test is enabled by default
+DISABLE_TEST=false
EXTRA_OPTIONS=()
while [ "$#" -ne 0 ]; do
@@ -23,6 +26,10 @@ while [ "$#" -ne 0 ]; do
PRESET="$2"
shift 2
;;
+ '--notest')
+ DISABLE_TEST=true
+ shift
+ ;;
'--')
shift
while [ "$#" -ne 0 ]; do
@@ -44,6 +51,10 @@ if [[ -z "${NNAS_INSTALL_PREFIX}" ]]; then
exit 255
fi
+if [[ "${DISABLE_TEST}" == "true" ]]; then
+ EXTRA_OPTIONS+=("-DENABLE_TEST=OFF")
+fi
+
PRESET_PATH="${SCRIPT_PATH}/preset/${PRESET}"
if [[ ! -f "${PRESET_PATH}" ]]; then
diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323
new file mode 100644
index 000000000..421106c35
--- /dev/null
+++ b/infra/packaging/preset/20220323
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+# new official preset will be added when new programs are ready
+
+PRESET="20220323"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+ # Data I/O
+ REQUIRED_UNITS+=("dio-hdf5")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("circle-tensordump" "circledump")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows
new file mode 100644
index 000000000..60500b1e0
--- /dev/null
+++ b/infra/packaging/preset/20220323_windows
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+ # Data I/O
+ REQUIRED_UNITS+=("dio-hdf5")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -G "MSYS Makefiles" \
+ -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DENABLE_TEST=OFF \
+ -DDOWNLOAD_GTEST=OFF \
+ -DBUILD_GTEST=OFF \
+ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install libraries to bin/ for Windows release
+ mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+ rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20220323" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+ # Though you have to install tensorflow to run 'tf2tfliteV2',
+ # tensorflow can't be installed in mingw. First, You can install tensorflow
+ # from Window native CMD(run as administrator) with python virtual environment.
+ # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323
new file mode 100644
index 000000000..0d44818a1
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20220323
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+ if [ "$#" -le 0 ]; then
+ return 1
+ fi
+ command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+ echo "Convert TensorFlow model to nnpackage."
+ echo "Usage: tf2nnpkg"
+ echo " --info <path/to/info>"
+ echo " --graphdef <path/to/pb>"
+ echo " -o <path/to/nnpkg/directory>"
+ echo " --v2 (optional) Use TF 2.x interface"
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--info')
+ export INFO_FILE="$2"
+ shift 2
+ ;;
+ '--graphdef')
+ export GRAPHDEF_FILE="$2"
+ shift 2
+ ;;
+ '-o')
+ export OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
+ *)
+ echo "${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+ echo "pb is not found. Please check --graphdef is correct."
+ exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+ echo "info is not found. Please check --info is correct."
+ exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+ echo "output directory is not specifed. Please check -o is correct.."
+ exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+ source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+ source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index e520dd381..6a857d2c8 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -8,7 +8,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite;mio-tflite260"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite;mio-tflite260;mio-tflite280"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
DEBUG_BUILD_ITEMS+=";luci-interpreter"
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
index 9f3966af7..26d8de4a9 100755
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -31,8 +31,9 @@ pushd $ROOT_PATH > /dev/null
export DOCKER_ENV_VARS
export DOCKER_VOLUMES
+export BUILD_OPTIONS
# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
-CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF' && \
+CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF $BUILD_OPTIONS' && \
export BUILD_TYPE=Release && \
cp -nv Makefile.template Makefile && \
make all install build_test_suite"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh
index 475da6d06..06cf8809a 100755
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
# Tools
REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/nnpackage/spec/10_packaging_and_manifest.md b/nnpackage/spec/10_packaging_and_manifest.md
index 4dc3de874..6aa0db74e 100644
--- a/nnpackage/spec/10_packaging_and_manifest.md
+++ b/nnpackage/spec/10_packaging_and_manifest.md
@@ -83,6 +83,7 @@ It can have the values (case-sensitive) in following table.
|--------|------------------------|
| tflite | tensorflow lite schema |
| circle | nnpackage schema |
+| tvn | trix-engine binary |
### Example
@@ -91,11 +92,11 @@ Here is an example of `MANIFEST`.
```
{
"major-version" : "1",
- "minor-version" : "1",
+ "minor-version" : "2",
"patch-version" : "0",
"configs" : [ "model.cfg" ],
- "models" : [ "mymodel.model", "yourmodel.model" ],
- "model-types" : [ "tflite", "circle" ]
+ "models" : [ "mymodel.model", "yourmodel.model", "binmodel.tvn" ],
+ "model-types" : [ "tflite", "circle", "tvn" ]
}
```
diff --git a/packaging/cpuinfo.tar.gz b/packaging/CPUINFO.tar.gz
index ced5debc3..ced5debc3 100644
--- a/packaging/cpuinfo.tar.gz
+++ b/packaging/CPUINFO.tar.gz
Binary files differ
diff --git a/packaging/gemmlowp.tar.gz b/packaging/GEMMLOWP.tar.gz
index 198dc1414..198dc1414 100644
--- a/packaging/gemmlowp.tar.gz
+++ b/packaging/GEMMLOWP.tar.gz
Binary files differ
diff --git a/packaging/oourafft.tar.gz b/packaging/OOURAFFT.tar.gz
index 85cf7fd6b..85cf7fd6b 100644
--- a/packaging/oourafft.tar.gz
+++ b/packaging/OOURAFFT.tar.gz
Binary files differ
diff --git a/packaging/ruy.tar.gz b/packaging/RUY.tar.gz
index 9ad14fe6c..9ad14fe6c 100644
--- a/packaging/ruy.tar.gz
+++ b/packaging/RUY.tar.gz
Binary files differ
diff --git a/packaging/eigen.tar.gz b/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz
index 396d12fa7..396d12fa7 100644
--- a/packaging/eigen.tar.gz
+++ b/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz
Binary files differ
diff --git a/packaging/TENSORFLOW_GPU.tar.gz b/packaging/TENSORFLOW_GPU.tar.gz
new file mode 100644
index 000000000..5133fee54
--- /dev/null
+++ b/packaging/TENSORFLOW_GPU.tar.gz
Binary files differ
diff --git a/packaging/gtest.tar.gz b/packaging/gtest.tar.gz
deleted file mode 100644
index b8c31918e..000000000
--- a/packaging/gtest.tar.gz
+++ /dev/null
Binary files differ
diff --git a/packaging/nnapi_test_generated.tar.gz b/packaging/nnapi_test_generated.tar.gz
index 504dbf9a6..446bd22c3 100644
--- a/packaging/nnapi_test_generated.tar.gz
+++ b/packaging/nnapi_test_generated.tar.gz
Binary files differ
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 547d46a0d..324fe1d11 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,18 +1,19 @@
Name: nnfw
Summary: nnfw
-Version: 1.19.0
+Version: 1.20.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
Source0: %{name}-%{version}.tar.gz
Source1: %{name}.manifest
+# TODO Update source number
Source1001: nnapi_test_generated.tar.gz
-Source1002: gtest.tar.gz
-Source1003: eigen.tar.gz
-Source1004: gemmlowp.tar.gz
-Source1005: ruy.tar.gz
-Source1006: cpuinfo.tar.gz
+#Source1002: GTEST.tar.gz
+Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
+Source1004: GEMMLOWP.tar.gz
+Source1005: RUY.tar.gz
+Source1006: CPUINFO.tar.gz
Source1007: XNNPACK.tar.gz
Source1008: FXDIV.tar.gz
Source1009: PTHREADPOOL.tar.gz
@@ -21,11 +22,13 @@ Source1011: FP16.tar.gz
Source1012: OPENCL_HEADERS.tar.gz
Source1013: FARMHASH.tar.gz
Source1014: ABSEIL.tar.gz
-Source1015: oourafft.tar.gz
+Source1015: OOURAFFT.tar.gz
+Source1016: TENSORFLOW_GPU.tar.gz
Source2001: nnfw.pc.in
Source2002: nnfw-plugin.pc.in
%{!?build_type: %define build_type Release}
+%{!?trix_support: %define trix_support 1}
%{!?coverage_build: %define coverage_build 0}
%{!?test_build: %define test_build 0}
%{!?extra_option: %define extra_option %{nil}}
@@ -53,6 +56,11 @@ BuildRequires: hdf5-devel
BuildRequires: libaec-devel
BuildRequires: zlib-devel
BuildRequires: libjpeg-devel
+BuildRequires: gtest-devel
+%endif
+
+%if %{trix_support} == 1
+BuildRequires: npu-engine-devel
%endif
%description
@@ -86,9 +94,12 @@ Summary: NNFW Test
NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
%endif
-%ifarch %{arm}
+%ifarch armv7l
%define target_arch armv7l
%endif
+%ifarch armv7hl
+%define target_arch armv7hl
+%endif
%ifarch x86_64
%define target_arch x86_64
%endif
@@ -123,7 +134,7 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
cp %{SOURCE1} .
mkdir ./externals
tar -xf %{SOURCE1001} -C ./tests/nnapi/src/
-tar -xf %{SOURCE1002} -C ./externals
+#tar -xf %{SOURCE1002} -C ./externals
tar -xf %{SOURCE1003} -C ./externals
tar -xf %{SOURCE1004} -C ./externals
tar -xf %{SOURCE1005} -C ./externals
@@ -137,9 +148,10 @@ tar -xf %{SOURCE1012} -C ./externals
tar -xf %{SOURCE1013} -C ./externals
tar -xf %{SOURCE1014} -C ./externals
tar -xf %{SOURCE1015} -C ./externals
+tar -xf %{SOURCE1016} -C ./externals
%build
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
# runtime build
%{build_env} ./nnfw configure %{build_options} %{extra_option}
%{build_env} ./nnfw build -j4
@@ -157,10 +169,10 @@ pwd > tests/scripts/build_path.txt
%endif # coverage_build
tar -zcf test-suite.tar.gz infra/scripts
%endif # test_build
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
%install
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
mkdir -p %{buildroot}%{_libdir}
mkdir -p %{buildroot}%{_bindir}
@@ -204,7 +216,7 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%files
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
%{_libdir}/*.so
%exclude %{_includedir}/CL/*
%endif
@@ -212,7 +224,7 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%files devel
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
%dir %{_includedir}/nnfw
%{_includedir}/nnfw/*
%{_libdir}/pkgconfig/nnfw.pc
@@ -221,13 +233,13 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%files plugin-devel
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
%dir %{_includedir}/onert
%{_includedir}/onert/*
%{_libdir}/pkgconfig/nnfw-plugin.pc
%endif
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
%files minimal-app
%manifest %{name}.manifest
%defattr(-,root,root,-)
@@ -238,10 +250,10 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%files test
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l armv7hl aarch64 x86_64
%dir %{test_install_home}
%{test_install_home}/*
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
%endif # test_build
%changelog
diff --git a/res/CircleSchema/0.3/circle_schema.fbs b/res/CircleSchema/0.3/circle_schema.fbs
new file mode 100644
index 000000000..3972056f9
--- /dev/null
+++ b/res/CircleSchema/0.3/circle_schema.fbs
@@ -0,0 +1,1137 @@
+// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+// `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+// `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : ubyte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
+ BCQ_GATHER = 252,
+ BCQ_FULLY_CONNECTED = 253,
+ INSTANCE_NORM = 254,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ BCQGatherOptions = 252,
+ BCQFullyConnectedOptions = 253,
+ InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+ SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adjoint_lhs:bool;
+ adjoint_rhs:bool;
+}
+
+table BCQGatherOptions {
+ input_hidden_size: int;
+ axis: int;
+}
+
+table BCQFullyConnectedOptions {
+ weights_hidden_size: int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+ epsilon:float;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+ // For 2D data, NHWC(batch, height, width, channels)
+ // For 3D data, NDHWC(batch, depth, height, width, channels)
+ CHANNELS_LAST = 0,
+ // For 2D data, NCHW(batch, channels, height, width)
+ // For 3D data, NCDHW(batch, channels, depth, height, width)
+ CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+
+ // Data format for input/output of SubGraph
+ data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/CircleSchema/0.4/circle_schema.fbs b/res/CircleSchema/0.4/circle_schema.fbs
new file mode 100644
index 000000000..8ad444d95
--- /dev/null
+++ b/res/CircleSchema/0.4/circle_schema.fbs
@@ -0,0 +1,1292 @@
+// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+// `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+// `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+ COMPLEX128 = 11,
+ UINT64 = 12,
+ // Experimental: Resource and variant types are experimental, that are subject
+ // to change. Do not implement custom kernels using resource & variant types
+ // now.
+ RESOURCE = 13,
+ VARIANT = 14,
+ UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+ BCQ_GATHER = -4,
+ BCQ_FULLY_CONNECTED = -3,
+ INSTANCE_NORM = -2,
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
+ PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ CUMSUM = 128,
+ CALL_ONCE = 129,
+ BROADCAST_TO = 130,
+ RFFT2D = 131,
+ CONV_3D = 132,
+ IMAG=133,
+ REAL=134,
+ COMPLEX_ABS=135,
+ HASHTABLE = 136,
+ HASHTABLE_FIND = 137,
+ HASHTABLE_IMPORT = 138,
+ HASHTABLE_SIZE = 139,
+ REDUCE_ALL = 140,
+ CONV_3D_TRANSPOSE = 141,
+ VAR_HANDLE = 142,
+ READ_VARIABLE = 143,
+ ASSIGN_VARIABLE = 144,
+ BROADCAST_ARGS = 145,
+ RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ CumsumOptions,
+ CallOnceOptions,
+ BroadcastToOptions,
+ Rfft2dOptions,
+ Conv3DOptions,
+ HashtableOptions,
+ HashtableFindOptions,
+ HashtableImportOptions,
+ HashtableSizeOptions,
+ VarHandleOptions,
+ ReadVariableOptions,
+ AssignVariableOptions,
+ RandomOptions,
+ BCQGatherOptions = 252,
+ BCQFullyConnectedOptions = 253,
+ InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+ padding:Padding;
+ stride_d:int;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_d_factor:int = 1;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+ SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 3.
+ pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ // This field is currently ignored in the L2 Norm Op.
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 5
+ pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+ // Parameters for Gather version 5 or above.
+ batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+ init_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adjoint_lhs:bool;
+ adjoint_rhs:bool;
+ // Parameters for BatchMatMul version 4 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+ exclusive:bool;
+ reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+ // The identity of hash tables. This identity will be used across different
+ // subgraphs in the same interpreter instance.
+ table_id:int;
+ key_dtype:TensorType;
+ value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+ container:string;
+ shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+ seed: int;
+ seed2: int;
+}
+
+table BCQGatherOptions {
+ input_hidden_size: int;
+ axis: int;
+}
+
+table BCQFullyConnectedOptions {
+ weights_hidden_size: int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+ epsilon:float;
+ fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ // This field is for backward compatibility. This field will be used when
+ // the value of the extended builtin_code field has less than
+ // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ deprecated_builtin_code:byte;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+
+ // This field is introduced for resolving op builtin code shortage problem
+ // (the original BuiltinOperator enum field was represented as a byte).
+ // This field will be used when the value of the extended builtin_code field
+ // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+ // For 2D data, NHWC(batch, height, width, channels)
+ // For 3D data, NDHWC(batch, depth, height, width, channels)
+ CHANNELS_LAST = 0,
+ // For 2D data, NCHW(batch, channels, height, width)
+ // For 3D data, NCDHW(batch, channels, depth, height, width)
+ CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+
+ // Data format for input/output of SubGraph
+ data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+ // Represents the alias to use for this tensor.
+ name:string;
+
+ // The actual tensor index in the primary graph, that 'name' corresponds to.
+ tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+ // Named inputs for this signature.
+ inputs:[TensorMap];
+
+ // Named outputs for this signature.
+ outputs:[TensorMap];
+
+ // Key value which was in the Tensorflow SavedModel SignatureDef map.
+ signature_key:string;
+
+ // Model tag, deprecated.
+ deprecated_tag:string (deprecated);
+
+ // Index of subgraphs that corresponds to the exported method.
+ subgraph_index:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+
+ // Optional SignatureDefs for the model.
+ signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py
new file mode 100644
index 000000000..cecc9c88a
--- /dev/null
+++ b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow batch_to_space, but with channels first layout
+class net_BatchToSpaceND(nn.Module):
+ def __init__(self, block_shape, crop):
+ super().__init__()
+ self.block_shape = block_shape
+ self.crop = crop
+
+ def forward(self, input):
+ # Prepare attributes
+ input_shape = list(map(int, list(input.shape)))
+ block_shape = self.block_shape
+ crop = self.crop
+
+ # number of spatial dimensions
+ m = len(block_shape)
+ # rest of dimensions
+ n = len(input.shape) - m
+ # output batch size
+ batch_size = input_shape[0] // np.product(block_shape)
+
+ unfolded_shape = list(block_shape) + [batch_size] + input_shape[1:]
+ fold_shape = [batch_size] + input_shape[1:n] + [
+ input_shape[i + n] * block_shape[i] for i in range(m)
+ ]
+ permute_dims = list(range(
+ m, m + n)) + [i + mod for i in range(m) for mod in [n + m, 0]]
+
+ # Actual model starts here
+ unfolded_input = input.reshape(unfolded_shape)
+ permuted = torch.permute(unfolded_input, permute_dims)
+ full_output = permuted.reshape(fold_shape)
+ # crop output tensor
+ crop_output = full_output
+ for i in range(m):
+ crop_size = sum(crop[i])
+ crop_output = crop_output.narrow(i + n, crop[i][0],
+ fold_shape[i + n] - crop_size)
+ return crop_output
+
+
+_model_ = net_BatchToSpaceND([2, 2], [[1, 0], [0, 1]])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(8, 4, 3, 3)
diff --git a/res/PyTorchExamples/examples/Conv2d-pad/__init__.py b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py
new file mode 100644
index 000000000..4c2b45e18
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2d(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.Conv2d(1, 1, 1, padding=(1, 0))
+
+ def forward(self, input):
+ return self.op(input)
+
+
+_model_ = net_Conv2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 17)
diff --git a/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py
new file mode 100644
index 000000000..235015ce2
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+
+# model representing YUVtoRGB conversion
+# for details see https://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
+class net_Conv2dYUVtoRGB(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.Conv2d(3, 3, 1, bias=False)
+ raw_weights = [[1.0, 0.0, 1.13983], \
+ [1.0, -0.39465, -0.58060], \
+ [1.0, 2.03211, 0.0]]
+ weights = torch.Tensor(raw_weights).reshape(3, 3, 1, 1)
+ self.op.weight = weight = torch.nn.Parameter(weights, requires_grad=False)
+
+ def forward(self, input):
+ return torch.clamp(self.op(input), 0.0, 1.0)
+
+
+_model_ = net_Conv2dYUVtoRGB()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 4, 4)
diff --git a/res/PyTorchExamples/examples/LSTM-bi/__init__.py b/res/PyTorchExamples/examples/LSTM-bi/__init__.py
new file mode 100644
index 000000000..6f5cea469
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-bi/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 5
+_batch_size = 3
+_input_size = 10
+_hidden_size = 20
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bidirectional=True)
+
+ def forward(self, inputs):
+ return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+ torch.randn(_seq_length, _batch_size, _input_size),
+ torch.randn(_number_layers * 2, _batch_size, _hidden_size),
+ torch.randn(_number_layers * 2, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-nobias/__init__.py b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py
new file mode 100644
index 000000000..d64704ae4
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 2
+_batch_size = 5
+_input_size = 15
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bias=False)
+
+ def forward(self, inputs):
+ return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+ torch.randn(_seq_length, _batch_size, _input_size),
+ torch.randn(_number_layers, _batch_size, _hidden_size),
+ torch.randn(_number_layers, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-noinit/__init__.py b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py
new file mode 100644
index 000000000..7aa79d6d6
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 1
+_batch_size = 5
+_input_size = 8
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.LSTM(_input_size, _hidden_size, _number_layers)
+
+ def forward(self, input):
+ return self.op(input)
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_length, _batch_size, _input_size)
diff --git a/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py
new file mode 100644
index 000000000..4225cd6c1
--- /dev/null
+++ b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_MaxPool2d(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.MaxPool2d(3, stride=1, return_indices=True)
+
+ def forward(self, input):
+ return self.op(input)
+
+
+_model_ = net_MaxPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 4, 4)
diff --git a/res/PyTorchExamples/examples/PixelShuffle/__init__.py b/res/PyTorchExamples/examples/PixelShuffle/__init__.py
new file mode 100644
index 000000000..14374ce11
--- /dev/null
+++ b/res/PyTorchExamples/examples/PixelShuffle/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_PixelShuffle(nn.Module):
+ def __init__(self, upscale_factor):
+ super().__init__()
+ self.op = torch.nn.PixelShuffle(upscale_factor)
+
+ def forward(self, input):
+ return self.op(input)
+
+
+_model_ = net_PixelShuffle(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 8, 3, 3)
diff --git a/res/PyTorchExamples/examples/RNN-bi/__init__.py b/res/PyTorchExamples/examples/RNN-bi/__init__.py
new file mode 100644
index 000000000..86f6e4fc0
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-bi/__init__.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+
+_input_size = 3
+_seq_len = 2
+_batch = 2
+_hidden_size = 5
+_num_layers = 2
+
+
+# model
+class net_RNN(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.RNN(_input_size, _hidden_size, _num_layers, bidirectional=True)
+
+ def forward(self, inputs):
+ return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+ torch.randn(_seq_len, _batch, _input_size),
+ torch.randn(2 * _num_layers, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-nobias/__init__.py b/res/PyTorchExamples/examples/RNN-nobias/__init__.py
new file mode 100644
index 000000000..a6a314877
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-nobias/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.RNN(_input_size, _hidden_size, 1, bias=False)
+
+ def forward(self, inputs):
+ return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+ torch.randn(_seq_len, _batch, _input_size),
+ torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-noinit/__init__.py b/res/PyTorchExamples/examples/RNN-noinit/__init__.py
new file mode 100644
index 000000000..492c2d0ed
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-noinit/__init__.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.RNN(_input_size, _hidden_size, 1)
+
+ def forward(self, input):
+ return self.op(input)
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_len, _batch, _input_size)
diff --git a/res/PyTorchExamples/examples/RNN-relu/__init__.py b/res/PyTorchExamples/examples/RNN-relu/__init__.py
new file mode 100644
index 000000000..c59c42192
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-relu/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op = nn.RNN(_input_size, _hidden_size, 1, nonlinearity='relu')
+
+ def forward(self, inputs):
+ return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+ torch.randn(_seq_len, _batch, _input_size),
+ torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py
new file mode 100644
index 000000000..78d57fd66
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow space_to_batch, but with channels first layout
+class net_SpaceToBatchND(nn.Module):
+ def __init__(self, block_shape, pad):
+ super().__init__()
+ self.block_shape = block_shape
+ self.pad = pad
+
+ def forward(self, input):
+ # Prepare attributes
+ input_shape = list(map(int, list(input.shape)))
+ block_shape = self.block_shape
+ pad = self.pad
+
+ # number of spatial dimensions
+ m = len(block_shape)
+ # rest of dimensions
+ n = len(input.shape) - m
+ # output batch size
+ batch_size = input_shape[0]
+
+ out_spatial_dim = [
+ (input_shape[i + n] + pad[i * 2] + pad[i * 2 + 1]) // block_shape[i]
+ for i in range(m)
+ ]
+ unfolded_shape = [batch_size] + input_shape[1:n] + [
+ dim for i in range(m) for dim in [out_spatial_dim[i], block_shape[i]]
+ ]
+ fold_shape = [batch_size * np.prod(block_shape)
+ ] + input_shape[1:n] + out_spatial_dim
+ permute_dims = list(range(n + 1, n + 2 * m, 2)) + list(range(n)) + list(
+ range(n, n + 2 * m, 2))
+
+ # Actual model starts here
+ padded_input = torch.nn.functional.pad(input, pad)
+ unfolded_input = padded_input.reshape(unfolded_shape)
+ permuted = torch.permute(unfolded_input, permute_dims)
+ output = permuted.reshape(fold_shape)
+ return output
+
+
+_model_ = net_SpaceToBatchND([2, 2], [1, 0, 0, 1])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 4, 5, 5)
diff --git a/res/PyTorchExamples/examples/SpaceToDepth/__init__.py b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py
new file mode 100644
index 000000000..62b225ddb
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model, equivalent to torch.pixel_unshuffle from torch 1.9+
+class net_SpaceToDepth(nn.Module):
+ def __init__(self, block_size):
+ super().__init__()
+ self.block_size = block_size
+
+ def forward(self, input):
+ # Prepare attributes
+ b_size = self.block_size
+ batch, input_c, input_h, input_w = list(map(int, list(input.shape)))
+ out_c = input_c * b_size * b_size
+ out_h = input_h // b_size
+ out_w = input_w // b_size
+
+ # Actual model starts here
+ x = input.reshape(batch, input_c, out_h, b_size, out_w, b_size)
+ x = x.permute([0, 1, 3, 5, 2, 4])
+ x = x.reshape([batch, out_c, out_h, out_w])
+ return x
+
+
+_model_ = net_SpaceToDepth(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 6, 6)
diff --git a/res/PyTorchExamples/examples/clamp/__init__.py b/res/PyTorchExamples/examples/clamp/__init__.py
new file mode 100644
index 000000000..92b72864f
--- /dev/null
+++ b/res/PyTorchExamples/examples/clamp/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_clamp(nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, input):
+ return torch.clamp(input, 0, 10)
+
+
+_model_ = net_clamp()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/interpolate/__init__.py b/res/PyTorchExamples/examples/interpolate/__init__.py
new file mode 100644
index 000000000..ba0da42f4
--- /dev/null
+++ b/res/PyTorchExamples/examples/interpolate/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This operation requires minimum 11 onnx opset version
+# - tf_onnx 1.9 fails to convert this model using opcode version 13+, because unsqueeze operation is not supported yet
+class net_interpolate(nn.Module):
+ def __init__(self, scale_factor):
+ super().__init__()
+ self.scale_factor = scale_factor
+
+ def forward(self, input):
+ return torch.nn.functional.interpolate(
+ input,
+ scale_factor=self.scale_factor,
+ mode='bilinear',
+ align_corners=True,
+ recompute_scale_factor=True)
+
+ def onnx_opset_version(self):
+ return 11
+
+
+_model_ = net_interpolate([2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/normalize/__init__.py b/res/PyTorchExamples/examples/normalize/__init__.py
new file mode 100644
index 000000000..288353ab4
--- /dev/null
+++ b/res/PyTorchExamples/examples/normalize/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_normalize(nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, input):
+ return torch.nn.functional.normalize(input, p=2.0, dim=3, eps=1e-12)
+
+
+_model_ = net_normalize()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/strided_slice/__init__.py b/res/PyTorchExamples/examples/strided_slice/__init__.py
new file mode 100644
index 000000000..7277da873
--- /dev/null
+++ b/res/PyTorchExamples/examples/strided_slice/__init__.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This model requires opset version 10+. Previous version does not support strides.
+class net_strided_slice(nn.Module):
+ def __init__(self, begin, end, stride):
+ super().__init__()
+ self.key = [slice(begin[i], end[i], stride[i]) for i in range(len(begin))]
+
+ def forward(self, input):
+ # this is general way to do input[:, :, 1:5:2, 0:5:2]
+ return input[self.key]
+
+ def onnx_opset_version(self):
+ return 10
+
+
+_model_ = net_strided_slice([0, 0, 1, 0], [1, 3, 5, 5], [1, 1, 2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 5, 5)
diff --git a/res/PyTorchExamples/ptem.py b/res/PyTorchExamples/ptem.py
index f205bac76..b6fb43887 100755
--- a/res/PyTorchExamples/ptem.py
+++ b/res/PyTorchExamples/ptem.py
@@ -48,8 +48,16 @@ for example in args.examples:
torch.save(module._model_, output_folder + example + ".pth")
print("Generate '" + example + ".pth' - Done")
+ opset_version = 9
+ if hasattr(module._model_, 'onnx_opset_version'):
+ opset_version = module._model_.onnx_opset_version()
+
torch.onnx.export(
- module._model_, module._dummy_, output_folder + example + ".onnx", verbose=True)
+ module._model_,
+ module._dummy_,
+ output_folder + example + ".onnx",
+ verbose=True,
+ opset_version=opset_version)
print("Generate '" + example + ".onnx' - Done")
onnx_model = onnx.load(output_folder + example + ".onnx")
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe
new file mode 100644
index 000000000..b5f329b57
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe
@@ -0,0 +1,29 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 4 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 2 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ keep_num_dims: true
+ }
+ input: "in"
+ input: "weight"
+ input: ""
+ output: "out"
+}
+input: "in"
+input: "weight"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Gather_000/test.recipe b/res/TensorFlowLiteRecipes/Gather_000/test.recipe
index 4c6c99da6..b9b2412cf 100644
--- a/res/TensorFlowLiteRecipes/Gather_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Gather_000/test.recipe
@@ -24,5 +24,4 @@ operation {
output: "ofm"
}
input: "param"
-input: "indices"
output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.recipe b/res/TensorFlowLiteRecipes/Gather_001/test.recipe
new file mode 100644
index 000000000..cc23cf11d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gather_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "param"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operand {
+ name: "indices"
+ type: INT32
+ shape { dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operation {
+ type: "Gather"
+ gather_options {
+ axis: 3
+ }
+ input: "param"
+ input: "indices"
+ output: "ofm"
+}
+input: "param"
+input: "indices"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.reverse b/res/TensorFlowLiteRecipes/Gather_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gather_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
index fa7fa7df7..c5d387293 100644
--- a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
@@ -32,6 +32,7 @@ operand {
name: "quantize"
type: UINT8
shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+ quant { scale: 1 zero_point: 128 }
}
operand {
name: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe
new file mode 100644
index 000000000..804d293fc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe
@@ -0,0 +1,131 @@
+operand {
+ name: "param_gather"
+ type: INT64
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "indices_gather"
+ type: INT64
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "ofm_gather"
+ type: INT64
+ shape { dim: 1 }
+}
+operand {
+ name: "shape_sparse"
+ type: INT64
+ shape { dim: 1 dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "3" arg: "5"
+ }
+}
+operand {
+ name: "values_sparse"
+ type: INT64
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "defalut_value_sparse"
+ type: INT64
+ shape { }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "ofm_sparse"
+ type: INT64
+ shape { dim: 3 }
+}
+operand {
+ name: "add_v2_2"
+ type: INT64
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "ofm_add_v2"
+ type: INT64
+ shape { dim: 3 }
+}
+operand {
+ name: "ofm_cast"
+ type: INT32
+ shape { dim: 3 }
+}
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 5 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "0" arg: "2" arg: "1" }
+}
+operand {
+ name: "ofm_trans"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 10 }
+}
+operation {
+ type: "Gather"
+ gather_options {
+ axis: 0
+ }
+ input: "param_gather"
+ input: "indices_gather"
+ output: "ofm_gather"
+}
+operation {
+ type: "SparseToDense"
+ sparse_to_dense_options {
+ validate_indices: false
+ }
+ input: "shape_sparse"
+ input: "values_sparse"
+ input: "ofm_gather"
+ input: "defalut_value_sparse"
+ output: "ofm_sparse"
+}
+operation {
+ type: "AddV2"
+ input: "ofm_sparse"
+ input: "add_v2_2"
+ output: "ofm_add_v2"
+}
+operation {
+ type: "Cast"
+ cast_options {
+ in_data_type: INT64
+ out_data_type: INT32
+ }
+ input: "ofm_add_v2"
+ output: "ofm_cast"
+}
+operation {
+ type: "Transpose"
+ transpose_options {
+ }
+ input: "ifm"
+ input: "perm"
+ output: "ofm_trans"
+}
+operation {
+ type: "Reshape"
+ input: "ofm_trans"
+ input: "ofm_cast"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe
new file mode 100644
index 000000000..d357a059f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe
@@ -0,0 +1,82 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operand {
+ name: "weight_feature"
+ type: FLOAT32
+ shape { dim: 64 dim: 16 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "weight_time"
+ type: FLOAT32
+ shape { dim: 64 dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "input_activation_state"
+ type: FLOAT32
+ is_variable: true
+ shape { dim: 1 dim: 512 }
+}
+operand {
+ name: "svdf"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operation {
+ type: "SVDF"
+ svdf_options {
+ rank: 1
+ activation: RELU
+ asymmetric_quantize_inputs: false
+ }
+ input: "ifm1"
+ input: "weight_feature"
+ input: "weight_time"
+ input: "bias"
+ input: "input_activation_state"
+ output: "svdf"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "svdf"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe
new file mode 100644
index 000000000..a712d2ac3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+ name: "in1"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "in2"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "mul"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "sqrtout"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "fcout"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operation {
+ type: "Mul"
+ input: "in1"
+ input: "in2"
+ output: "mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Sqrt"
+ input: "mul"
+ output: "sqrtout"
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "mul"
+ input: "weight"
+ input: ""
+ output: "fcout"
+}
+input: "in1"
+input: "in2"
+output: "fcout"
+output: "sqrtout"
diff --git a/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe
new file mode 100644
index 000000000..1d20443c8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+ name: "split_dim"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "0" }
+}
+operand {
+ name: "split1"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+ name: "split2"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+ type: "Split"
+ split_options {
+ num_splits: 2
+ }
+ input: "split_dim"
+ input: "ifm"
+ output: "split1"
+ output: "split2"
+}
+operation {
+ type: "Add"
+ input: "split1"
+ input: "split2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe
new file mode 100644
index 000000000..5c150922e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe
@@ -0,0 +1,36 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+ quant { scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "add_const"
+ type: UINT8
+ shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+ quant { scale: 1.0 zero_point: 0 }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ arg: "1"
+ arg: "2"
+ arg: "3"
+ }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+ quant { scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Add"
+ input: "ifm"
+ input: "add_const"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule
new file mode 100644
index 000000000..7bde66240
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule
@@ -0,0 +1,10 @@
+# To check fake quantization.
+# All Ops are float32. Quantize/Dequantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_FP32" $(tensor_dtype ifm) '=' FLOAT32
+RULE "ADD_CONST_FP32" $(tensor_dtype add_const_DQ) '=' FLOAT32
+RULE "ADD_FP32" $(tensor_dtype ofm) '=' FLOAT32
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
+RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json
new file mode 100644
index 000000000..536fef232
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm_conv",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe
new file mode 100644
index 000000000..3a3dba47f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe
@@ -0,0 +1,92 @@
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule
new file mode 100644
index 000000000..912405507
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization.
+# Conv is int16, and others u8. Quantize Ops are inserted before/after Conv.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_INT16" $(tensor_dtype ofm_conv) '=' INT16
+RULE "WEIGHTS_INT16" $(tensor_dtype filter) '=' INT16
+RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT64
+RULE "MUL_U8" $(tensor_dtype ofm_mul) '=' UINT8
+RULE "ADD_U8" $(tensor_dtype ofm_add) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json
new file mode 100644
index 000000000..824f0791d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json
@@ -0,0 +1,16 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm_conv",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "ofm_mul",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe
new file mode 100644
index 000000000..3a3dba47f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe
@@ -0,0 +1,92 @@
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule
new file mode 100644
index 000000000..7df910a40
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv and after Mul.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_INT16" $(tensor_dtype ofm_conv) '=' INT16
+RULE "WEIGHTS_INT16" $(tensor_dtype filter) '=' INT16
+RULE "BIAS_INT64" $(tensor_dtype bias) '=' INT64
+RULE "MUL_INT16" $(tensor_dtype ofm_mul) '=' INT16
+RULE "MUL_CONST_INT16" $(tensor_dtype mul_const) '=' INT16
+RULE "ADD_UINT8" $(tensor_dtype ofm_add) '=' UINT8
+RULE "ADD_CONST_UINT8" $(tensor_dtype add_const) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json
new file mode 100644
index 000000000..824f0791d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json
@@ -0,0 +1,16 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm_conv",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ },
+ {
+ "name" : "ofm_mul",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe
new file mode 100644
index 000000000..9e114b33a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe
@@ -0,0 +1,88 @@
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_non_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_non_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+input: "mul_non_const"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule
new file mode 100644
index 000000000..b539872fc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv, after Mul, before Mul's non-const input.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_INT16" $(tensor_dtype ofm_conv) '=' INT16
+RULE "WEIGHTS_INT16" $(tensor_dtype filter) '=' INT16
+RULE "BIAS_INT64" $(tensor_dtype bias) '=' INT64
+RULE "MUL_INT16" $(tensor_dtype ofm_mul) '=' INT16
+RULE "MUL_NON_CONST_UINT8" $(tensor_dtype mul_non_const) '=' UINT8
+RULE "ADD_UINT8" $(tensor_dtype ofm_add) '=' UINT8
+RULE "ADD_CONST_UINT8" $(tensor_dtype add_const) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json
new file mode 100644
index 000000000..102e05fc7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm1",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe
new file mode 100644
index 000000000..ef7908979
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+ name: "split_dim"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "0" }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+ type: "Split"
+ split_options {
+ num_splits: 2
+ }
+ input: "split_dim"
+ input: "ifm"
+ output: "ofm1"
+ output: "ofm2"
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+ type: "Add"
+ input: "ofm1"
+ input: "ofm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule
new file mode 100644
index 000000000..dc1ed874e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_UINT8" $(tensor_dtype ifm) '=' UINT8
+RULE "SPLIT_OUT_1_INT16" $(tensor_dtype ofm1) '=' INT16
+RULE "SPLIT_OUT_2_INT16" $(tensor_dtype ofm2) '=' INT16
+RULE "ADD_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json
new file mode 100644
index 000000000..272081b27
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm2",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe
new file mode 100644
index 000000000..ef7908979
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+ name: "split_dim"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "0" }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+ type: "Split"
+ split_options {
+ num_splits: 2
+ }
+ input: "split_dim"
+ input: "ifm"
+ output: "ofm1"
+ output: "ofm2"
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+ type: "Add"
+ input: "ofm1"
+ input: "ofm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule
new file mode 100644
index 000000000..dc1ed874e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_UINT8" $(tensor_dtype ifm) '=' UINT8
+RULE "SPLIT_OUT_1_INT16" $(tensor_dtype ofm1) '=' INT16
+RULE "SPLIT_OUT_2_INT16" $(tensor_dtype ofm2) '=' INT16
+RULE "ADD_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.recipe b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe
new file mode 100644
index 000000000..943341be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe
@@ -0,0 +1,66 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_c"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm_c"
+}
+operand {
+ name: "ofm_q"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Quantize"
+ input: "ofm_c"
+ output: "ofm_q"
+}
+operand {
+ name: "ofm"
+ type: INT16
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+ quant { min: -255 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "Quantize"
+ input: "ofm_q"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.reverse b/res/TensorFlowLiteRecipes/Quantize_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe
new file mode 100644
index 000000000..cd45f1b56
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe
@@ -0,0 +1,62 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "weight_feature"
+ type: FLOAT32
+ shape { dim: 64 dim: 16 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "weight_time"
+ type: FLOAT32
+ shape { dim: 64 dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "input_activation_state"
+ type: FLOAT32
+ is_variable: true
+ shape { dim: 1 dim: 512 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operation {
+ type: "SVDF"
+ svdf_options {
+ rank: 1
+ activation: RELU
+ asymmetric_quantize_inputs: false
+ }
+ input: "ifm"
+ input: "weight_feature"
+ input: "weight_time"
+ input: "bias"
+ input: "input_activation_state"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.reverse b/res/TensorFlowLiteRecipes/SVDF_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.recipe b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe
new file mode 100644
index 000000000..38b76c2a4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe
@@ -0,0 +1,52 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "weight_feature"
+ type: FLOAT32
+ shape { dim: 64 dim: 16 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "weight_time"
+ type: FLOAT32
+ shape { dim: 64 dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "input_activation_state"
+ type: FLOAT32
+ is_variable: true
+ shape { dim: 1 dim: 512 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 }
+}
+operation {
+ type: "SVDF"
+ svdf_options {
+ rank: 1
+ activation: RELU
+ asymmetric_quantize_inputs: false
+ }
+ input: "ifm"
+ input: "weight_feature"
+ input: "weight_time"
+ input: ""
+ input: "input_activation_state"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.reverse b/res/TensorFlowLiteRecipes/SVDF_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
index ae993e6d8..81e1e56e8 100644
--- a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
@@ -71,8 +71,7 @@ signature_def {
name: "ofm1"
tensor_index: 2
}
- method_name: "serving_default"
- key: "serv"
+ signature_key: "serving_default"
subgraph_index: 0
}
input: "ifm1"
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe
new file mode 100644
index 000000000..a1731f99e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe
@@ -0,0 +1,81 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm1"
+ add_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm2"
+ mul_options {
+ activation: 0
+ }
+}
+operation {
+ type: "Sub"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm3"
+ sub_options {
+ activation: 0
+ }
+}
+signature_def {
+ inputs: {
+ name: "ifm1"
+ tensor_index: 0
+ }
+ inputs: {
+ name: "ifm2"
+ tensor_index: 1
+ }
+ outputs {
+ name: "out3"
+ tensor_index: 3
+ }
+ outputs {
+ name: "out2"
+ tensor_index: 4
+ }
+ outputs {
+ name: "out1"
+ tensor_index: 2
+ }
+ signature_key: "serving_default"
+ subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
index 1754f9a58..6d258e73f 100644
--- a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
@@ -2,7 +2,6 @@ operand {
name: "ifm"
type: FLOAT32
shape { dim: 1 dim: 3 dim: 3 dim: 2 }
- filler { tag: "constant" arg: "3.5" }
}
operand {
name: "ofm"
diff --git a/res/TensorFlowLiteSchema/2.7.0/schema.fbs b/res/TensorFlowLiteSchema/2.7.0/schema.fbs
new file mode 100644
index 000000000..3e0b999f5
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.7.0/schema.fbs
@@ -0,0 +1,1250 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+// version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+// version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+ COMPLEX128 = 11,
+ UINT64 = 12,
+ // Experimental: Resource and variant types are experimental, that are subject
+ // to change. Do not implement custom kernels using resource & variant types
+ // now.
+ RESOURCE = 13,
+ VARIANT = 14,
+ UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
+ PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ CUMSUM = 128,
+ CALL_ONCE = 129,
+ BROADCAST_TO = 130,
+ RFFT2D = 131,
+ CONV_3D = 132,
+ IMAG=133,
+ REAL=134,
+ COMPLEX_ABS=135,
+ HASHTABLE = 136,
+ HASHTABLE_FIND = 137,
+ HASHTABLE_IMPORT = 138,
+ HASHTABLE_SIZE = 139,
+ REDUCE_ALL = 140,
+ CONV_3D_TRANSPOSE = 141,
+ VAR_HANDLE = 142,
+ READ_VARIABLE = 143,
+ ASSIGN_VARIABLE = 144,
+ BROADCAST_ARGS = 145,
+ RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ CumsumOptions,
+ CallOnceOptions,
+ BroadcastToOptions,
+ Rfft2dOptions,
+ Conv3DOptions,
+ HashtableOptions,
+ HashtableFindOptions,
+ HashtableImportOptions,
+ HashtableSizeOptions,
+ VarHandleOptions,
+ ReadVariableOptions,
+ AssignVariableOptions,
+ RandomOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+ padding:Padding;
+ stride_d:int;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_d_factor:int = 1;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 3.
+ pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ // This field is currently ignored in the L2 Norm Op.
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 5
+ pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+ // Parameters for Gather version 5 or above.
+ batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+ init_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adj_x:bool;
+ adj_y:bool;
+ // Parameters for BatchMatMul version 4 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+ exclusive:bool;
+ reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+ // The identity of hash tables. This identity will be used across different
+ // subgraphs in the same interpreter instance.
+ table_id:int;
+ key_dtype:TensorType;
+ value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+ container:string;
+ shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+ seed: int;
+ seed2: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ // This field is for backward compatibility. This field will be used when
+ // the value of the extended builtin_code field has less than
+ // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ deprecated_builtin_code:byte;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+
+ // This field is introduced for resolving op builtin code shortage problem
+ // (the original BuiltinOperator enum field was represented as a byte).
+ // This field will be used when the value of the extended builtin_code field
+ // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+ // Represents the alias to use for this tensor.
+ name:string;
+
+ // The actual tensor index in the primary graph, that 'name' corresponds to.
+ tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+ // Named inputs for this signature.
+ inputs:[TensorMap];
+
+ // Named outputs for this signature.
+ outputs:[TensorMap];
+
+ // Key value which was in the Tensorflow SavedModel SignatureDef map.
+ signature_key:string;
+
+ // Model tag, deprecated.
+ deprecated_tag:string (deprecated);
+
+ // Index of subgraphs that corresponds to the exported method.
+ subgraph_index:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+
+ // Optional SignatureDefs for the model.
+ signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.8.0/schema.fbs b/res/TensorFlowLiteSchema/2.8.0/schema.fbs
new file mode 100644
index 000000000..af55a262f
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.8.0/schema.fbs
@@ -0,0 +1,1264 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+// version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+// version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+ COMPLEX128 = 11,
+ UINT64 = 12,
+ // Experimental: Resource and variant types are experimental, that are subject
+ // to change. Do not implement custom kernels using resource & variant types
+ // now.
+ RESOURCE = 13,
+ VARIANT = 14,
+ UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
+ PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ CUMSUM = 128,
+ CALL_ONCE = 129,
+ BROADCAST_TO = 130,
+ RFFT2D = 131,
+ CONV_3D = 132,
+ IMAG=133,
+ REAL=134,
+ COMPLEX_ABS=135,
+ HASHTABLE = 136,
+ HASHTABLE_FIND = 137,
+ HASHTABLE_IMPORT = 138,
+ HASHTABLE_SIZE = 139,
+ REDUCE_ALL = 140,
+ CONV_3D_TRANSPOSE = 141,
+ VAR_HANDLE = 142,
+ READ_VARIABLE = 143,
+ ASSIGN_VARIABLE = 144,
+ BROADCAST_ARGS = 145,
+ RANDOM_STANDARD_NORMAL = 146,
+ BUCKETIZE = 147,
+ RANDOM_UNIFORM = 148,
+ MULTINOMIAL = 149,
+ GELU = 150,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ CumsumOptions,
+ CallOnceOptions,
+ BroadcastToOptions,
+ Rfft2dOptions,
+ Conv3DOptions,
+ HashtableOptions,
+ HashtableFindOptions,
+ HashtableImportOptions,
+ HashtableSizeOptions,
+ VarHandleOptions,
+ ReadVariableOptions,
+ AssignVariableOptions,
+ RandomOptions,
+ BucketizeOptions,
+ GeluOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+ padding:Padding;
+ stride_d:int;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_d_factor:int = 1;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 3.
+ pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ // This field is currently ignored in the L2 Norm Op.
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 5
+ pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+ // Parameters for Gather version 5 or above.
+ batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+ init_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adj_x:bool;
+ adj_y:bool;
+ // Parameters for BatchMatMul version 4 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+ exclusive:bool;
+ reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+ // The identity of hash tables. This identity will be used across different
+ // subgraphs in the same interpreter instance.
+ table_id:int;
+ key_dtype:TensorType;
+ value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+ container:string;
+ shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+ seed: long;
+ seed2: long;
+}
+
+table BucketizeOptions {
+ boundaries: [float]; // The bucket boundaries.
+}
+
+table GeluOptions {
+ approximate: bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ // This field is for backward compatibility. This field will be used when
+ // the value of the extended builtin_code field has less than
+ // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ deprecated_builtin_code:byte;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+
+ // This field is introduced for resolving op builtin code shortage problem
+ // (the original BuiltinOperator enum field was represented as a byte).
+ // This field will be used when the value of the extended builtin_code field
+ // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+ // Represents the alias to use for this tensor.
+ name:string;
+
+ // The actual tensor index in the primary graph, that 'name' corresponds to.
+ tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+ // Named inputs for this signature.
+ inputs:[TensorMap];
+
+ // Named outputs for this signature.
+ outputs:[TensorMap];
+
+ // Key value which was in the Tensorflow SavedModel SignatureDef map.
+ signature_key:string;
+
+ // Model tag, deprecated.
+ deprecated_tag:string (deprecated);
+
+ // Index of subgraphs that corresponds to the exported method.
+ subgraph_index:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+
+ // Optional SignatureDefs for the model.
+ signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 9b8840fc2..9a9465072 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.19.0"
+ versionName "1.20.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt
index beb279cb9..63e4fc545 100644
--- a/runtime/contrib/android_benchmark_app/CMakeLists.txt
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -55,7 +55,7 @@ target_link_libraries(android_benchmark_native nnfw_lib_tflite)
target_link_libraries(android_benchmark_native nnfw_lib_misc)
target_link_libraries(android_benchmark_native log)
-nnas_find_package(FlatBuffersSource EXACT 1.12 REQUIRED)
+nnas_find_package(FlatBuffersSource EXACT 2.0 REQUIRED)
target_include_directories(android_benchmark_native PUBLIC ${FlatBuffersSource_DIR}/include .)
add_custom_target(android-benchmark-apk ALL
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index b238b1f89..beb243a4d 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -11,6 +11,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
+target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage)
# NOTE Below line is added to remove warning for android build
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 6eb7e6ba9..6f296a931 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -92,6 +92,15 @@ typedef enum
*/
NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6,
+ /**
+ * A tensor of 16 bit signed integers that represent real numbers.
+ *
+ * real_value = (integer_value - zeroPoint) * scale.
+ *
+ * Forced to have zeroPoint equal to 0.
+ */
+ NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7,
+
} NNFW_TYPE;
/**
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index b885a6b90..45b34716a 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001300
+#define NNFW_VERSION 0x01001400
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index b69dd83e4..0ebd385e9 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -28,6 +28,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_BOOL, 3);
STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_UINT8, 4);
STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_INT64, 5);
STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED, 6);
+STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED, 7);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 1a3aaf9e9..62a043921 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -23,6 +23,7 @@
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
+#include "trix_loader.h"
#include "json/json.h"
#include "ir/OpCode.h"
#include "util/TracingCtx.h"
@@ -155,6 +156,45 @@ void setConfigKeyValues(const CfgKeyValues &keyValues)
onert::util::config_source_ext(std::move(configsrc));
}
+NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
+{
+ using onert::ir::DataType;
+ switch (dt)
+ {
+ case DataType::FLOAT32:
+ return NNFW_TYPE_TENSOR_FLOAT32;
+ case DataType::INT32:
+ return NNFW_TYPE_TENSOR_INT32;
+ case DataType::QUANT_UINT8_ASYMM:
+ return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
+ case DataType::BOOL8:
+ return NNFW_TYPE_TENSOR_BOOL;
+ case DataType::UINT8:
+ return NNFW_TYPE_TENSOR_UINT8;
+ case DataType::INT64:
+ return NNFW_TYPE_TENSOR_INT64;
+ case DataType::QUANT_INT8_ASYMM:
+ return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
+ case DataType::QUANT_INT16_SYMM:
+ return NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED;
+ case DataType::UINT32:
+ case DataType::QUANT_INT8_SYMM:
+ default:
+ throw std::runtime_error("Error: Model has type that runtime API does not support.");
+ }
+}
+
+void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
+ const onert::ir::DataType &dtype)
+{
+ ti->rank = shape.rank();
+ for (int j = 0; j < ti->rank; ++j)
+ {
+ ti->dims[j] = shape.dim(j);
+ }
+ ti->dtype = datatype_to_nnfw_dtype(dtype);
+}
+
} // namespace
nnfw_session::nnfw_session()
@@ -225,6 +265,10 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
{
_subgraphs = onert::circle_loader::loadModel(filename.c_str());
}
+ else if (model_type == ".tvn")
+ {
+ _subgraphs = onert::trix_loader::loadModel(filename.c_str());
+ }
else
{
std::cerr << "Unsupported model type" << std::endl;
@@ -307,6 +351,10 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
{
_subgraphs = onert::circle_loader::loadModel(model_file_path);
}
+ else if (model_type == "tvn")
+ {
+ _subgraphs = onert::trix_loader::loadModel(model_file_path);
+ }
else
{
std::cerr << "Unsupported model type in MANIFEST" << std::endl;
@@ -657,32 +705,6 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
return NNFW_STATUS_NO_ERROR;
}
-static NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
-{
- using onert::ir::DataType;
- switch (dt)
- {
- case DataType::FLOAT32:
- return NNFW_TYPE_TENSOR_FLOAT32;
- case DataType::INT32:
- return NNFW_TYPE_TENSOR_INT32;
- case DataType::QUANT_UINT8_ASYMM:
- return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
- case DataType::BOOL8:
- return NNFW_TYPE_TENSOR_BOOL;
- case DataType::UINT8:
- return NNFW_TYPE_TENSOR_UINT8;
- case DataType::INT64:
- return NNFW_TYPE_TENSOR_INT64;
- case DataType::QUANT_INT8_ASYMM:
- return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
- case DataType::UINT32:
- case DataType::QUANT_INT8_SYMM:
- default:
- throw std::runtime_error("Error: Model has type that runtime API does not support.");
- }
-}
-
NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
{
// sanity check
@@ -769,22 +791,11 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
auto shape = primary_subgraph()->operands().at(opidx).shape();
if (isStatePreparedOrFinishedRun())
{
- if (_execution)
- {
- shape = _execution->getInputShape(onert::ir::IOIndex{index});
- }
- else
- {
- shape = _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
- }
- }
-
- ti->rank = shape.rank();
- for (int j = 0; j < ti->rank; ++j)
- {
- ti->dims[j] = shape.dim(j);
+ shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index})
+ : _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
}
- ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
+ auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
@@ -820,21 +831,12 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
// If it is called after `nnfw_run` then get the shape from Execution, not from the graph
if (isStateFinishedRun())
{
- if (_execution)
- {
- shape = _execution->getOutputShape(onert::ir::IOIndex{index});
- }
- else
- {
- shape = _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
- }
- }
- ti->rank = shape.rank();
- for (int j = 0; j < ti->rank; ++j)
- {
- ti->dims[j] = shape.dim(j);
+ shape = _execution
+ ? _execution->getOutputShape(onert::ir::IOIndex{index})
+ : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
}
- ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
+ auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 4b21e0ace..c43160ba7 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -1,9 +1,14 @@
+# Backend common libs
set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common)
+set(LIB_ONERT_BACKEND_CL_COMMON onert_backend_cl_common)
+add_subdirectory(cl_common)
+add_subdirectory(acl_common)
+# Backends
add_subdirectory(cpu)
add_subdirectory(acl_cl)
add_subdirectory(acl_neon)
-add_subdirectory(acl_common)
add_subdirectory(ruy)
add_subdirectory(gpu_cl)
add_subdirectory(xnnpack)
+add_subdirectory(trix)
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
deleted file mode 100644
index 5595043ca..000000000
--- a/runtime/onert/backend/acl_cl/BackendContext.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-void BackendContext::initConsts()
-{
- _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
- constant_initializer->setLayout(graph()->layout());
- op.accept(*constant_initializer);
- });
-
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- if (_data.external_operands.contains(ind) || !operand.isConstant())
- return;
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- });
-
- constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (_data.external_operands.contains(ind))
- return;
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- if (obj.isConstant())
- constants.append(ind);
-
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any operation (No use and def)
- const auto info = obj.info();
- const auto layout = _data.operand_layouts.at(ind);
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, layout);
- }
- });
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor and allocate it first.
- // Increasing use count here makes the tensor never be deallocated, i.e it they will be
- // deallocated last.
- VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_ind : _data.op_order)
- {
- const auto &op = graph()->operations().at(op_ind);
- auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
- // Define outputs
- for (const auto &ind : op_outputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph()->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
- }
- }
- }
-
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (uses_map[ind] == 0)
- {
- tensor_builder->notifyLastUse(ind);
- }
- });
-
- // Dispose and validate
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
- optimizer->optimize();
-
- graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (external_operands().contains(ind))
- return;
-
- const auto frontend_layout = graph()->layout();
- const auto backend_layout = operand_layouts().at(ind);
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
- });
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- planTensors();
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- });
- }
-
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
- FunctionMap ret;
-
- for (auto op_ind : _data.op_order)
- {
- auto fn_seq = kernel_gen->generate(op_ind);
- ret.emplace_back(op_ind, std::move(fn_seq));
- }
-
- tensor_builder->allocate();
- initConsts();
-
- // NOTE For memory optimization, we want to free some operand data
- const_cast<ir::Graph &>(*_data.graph)
- .operands()
- .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- for (auto &it : ret)
- {
- auto &fn_seq = it.second;
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- tensor_builder->postFunctionPrepare();
- });
- }
-
- return ret;
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
index 2638046ca..5da915825 100644
--- a/runtime/onert/backend/acl_cl/BackendContext.h
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -17,10 +17,11 @@
#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
+#include "TensorBuilder.h"
namespace onert
{
@@ -31,33 +32,8 @@ namespace acl_cl
class Optimizer;
-class BackendContext : public onert::backend::BackendContext
-{
-public:
- BackendContext(const Backend *backend, ContextData &&data,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
- kernel_gen}
- {
- }
-
- ITensorRegistry *genTensors() override;
- FunctionMap genKernels() override;
-
-private:
- void initConsts();
- void planTensors();
-
-public:
- std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
- std::shared_ptr<KernelGenerator> kernel_gen;
- std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+ acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
} // namespace acl_cl
} // namespace backend
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index 54b2a7a08..0431bb198 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -58,21 +58,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
if (block_size_obj.isConstant())
{
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
+ _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
}
const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 12e805ee5..a9ce888ee 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -16,12 +16,12 @@
#include "Optimizer.h"
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
-#include <cassert>
#include <compiler/LoweredGraph.h>
#include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
namespace onert
{
diff --git a/runtime/onert/backend/acl_common/AclBackendContext.h b/runtime/onert/backend/acl_common/AclBackendContext.h
new file mode 100644
index 000000000..b8d027476
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclBackendContext.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+#include <cl_common/BackendContext.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator,
+ typename T_Optimizer>
+class AclBackendContext
+ : public onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+ T_KernelGenerator>
+{
+public:
+ AclBackendContext(const Backend *backend, ContextData &&data,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+ T_KernelGenerator>(
+ backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
+ {
+ // DO NOTHING
+ }
+
+ ITensorRegistry *genTensors() override
+ {
+ optimizer->optimize();
+
+ this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (this->external_operands().contains(ind))
+ return;
+
+ const auto frontend_layout = this->graph()->layout();
+ const auto backend_layout = this->operand_layouts().at(ind);
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ this->tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+ });
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ this->planTensors();
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (this->tensor_builder->isRegistered(ind))
+ this->tensor_builder->notifyFirstUse(ind);
+ });
+ }
+
+ this->tensor_builder->prepare();
+
+ return this->tensor_registry.get();
+ }
+
+protected:
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout) override
+ {
+ this->tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+
+public:
+ // TODO Make it private
+ std::shared_ptr<T_Optimizer> optimizer;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index b7f66b50e..65659ad50 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -153,6 +153,23 @@ void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &o
Init<T>(model_obj, obj, copy, frontend_layout);
}
+// Pre-defined initializer - fill reverse order
+template <typename T> void initReverseOrder(const ir::Operand &model_obj, backend::ITensor &obj)
+{
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const T value = base[shape.num_elements() - i - 1];
+ T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
+ *into = value;
+ }
+ });
+}
+
class AclConstantInitializer : public ir::OperationVisitor
{
public:
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
index 60f4ebf7e..a0bbe7c3c 100644
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -17,9 +17,10 @@
#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
#define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
+#include <cl_common/ParentInfo.h>
+
#include <ir/OperationVisitor.h>
#include <ir/Graph.h>
-#include "ParentInfo.h"
namespace onert
{
@@ -94,21 +95,21 @@ public:
}
coordinate_info.set(axis, axis_point);
- _parent_map.emplace(
- input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+ _parent_map.emplace(input_index,
+ cl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
axis_point += input_shape.dim(axis);
}
}
- std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap()
+ std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&releaseParentMap()
{
return std::move(_parent_map);
}
private:
const ir::Graph &_graph;
- std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map;
+ std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> _parent_map;
ir::Layout _current_op_layout{ir::Layout::UNKNOWN};
bool usePadding{false};
};
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 7c1c5dd9a..e008fd6f5 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -17,18 +17,21 @@
#ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
#define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
-#include <memory>
-#include <queue>
-
-#include <arm_compute/core/Types.h>
-#include "ir/OperandIndexMap.h"
-#include <ir/Operands.h>
#include "AclTensorManager.h"
#include "AclTensorRegistry.h"
-#include <memory>
-#include "ParentInfo.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/OperandIndexMap.h>
+#include <ir/Operands.h>
#include <util/Utils.h>
+#include <arm_compute/core/Types.h>
+
+#include <memory>
+#include <queue>
+
namespace onert
{
namespace backend
@@ -36,16 +39,12 @@ namespace backend
namespace acl_common
{
-enum class UsesType
-{
- FIRST,
- LAST
-};
-
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
{
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
+ // TODO Remove this alias and direct usage of this type
+ using UsesType = cl_common::UsesType;
AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
@@ -76,7 +75,7 @@ public:
_uses_count_map[index] = num_uses;
}
- void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
+ void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
{
_parent_map = std::move(parent_map);
}
@@ -104,10 +103,10 @@ private:
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
// for linear executor
- std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
+ cl_common::LifetimeSeq _lifetime_seq;
// Extra info for concat elimination
- ir::OperandIndexMap<ParentInfo> _parent_map;
+ ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
};
} // namespace acl_common
@@ -217,55 +216,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
{
- // Update lifetime sequence to apply subtensor optimization
-
- std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
- std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
- [&](ir::OperandIndex ind) -> ir::OperandIndex & {
- ir::OperandIndex &ret = root_map[ind];
-
- // We know the root parent value already
- if (ret.valid())
- return ret;
-
- auto itr = _parent_map.find(ind);
- if (itr == _parent_map.end())
- {
- // If there is no parent, let's store the value of itself
- return ret = ind;
- }
- else
- {
- return ret = find_root(itr->second.parent);
- }
- };
-
- ir::OperandIndexMap<bool> first_use_check;
- ir::OperandIndexMap<bool> last_use_check;
- std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
- for (size_t i = 0; i < _lifetime_seq.size(); i++)
- {
- auto &entry = _lifetime_seq[i];
- if (entry.first != UsesType::FIRST)
- continue;
- auto root_ind = find_root(entry.second);
- if (first_use_check[root_ind])
- continue;
- first_use_check[root_ind] = true;
- lifetime_map[i] = {UsesType::FIRST, root_ind};
- }
-
- for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
- {
- auto &entry = _lifetime_seq[i];
- if (entry.first != UsesType::LAST)
- continue;
- auto root_ind = find_root(entry.second);
- if (last_use_check[root_ind])
- continue;
- last_use_check[root_ind] = true;
- lifetime_map[i] = {UsesType::LAST, root_ind};
- }
+ auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
for (auto &entry : lifetime_map)
{
diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt
index d3ae5acf7..8d409a47c 100644
--- a/runtime/onert/backend/acl_common/CMakeLists.txt
+++ b/runtime/onert/backend/acl_common/CMakeLists.txt
@@ -12,6 +12,7 @@ target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURREN
target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${LIB_ONERT_BACKEND_CL_COMMON})
target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
deleted file mode 100644
index 4de3de02d..000000000
--- a/runtime/onert/backend/acl_neon/BackendContext.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-void BackendContext::initConsts()
-{
- _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
- constant_initializer->setLayout(graph()->layout());
- op.accept(*constant_initializer);
- });
-
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- if (_data.external_operands.contains(ind) || !operand.isConstant())
- return;
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- });
-
- constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (_data.external_operands.contains(ind))
- return;
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- if (obj.isConstant())
- constants.append(ind);
-
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any operation (No use and def)
- const auto info = obj.info();
- const auto layout = _data.operand_layouts.at(ind);
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, layout);
- }
- });
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor and allocate it first.
- // Increasing use count here makes the tensor never be deallocated, i.e it they will be
- // deallocated last.
- VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_ind : _data.op_order)
- {
- auto op_inputs =
- graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED;
-
- // Define outputs
- for (const auto &ind : op_outputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph()->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
- }
- }
- }
-
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (uses_map[ind] == 0)
- {
- tensor_builder->notifyLastUse(ind);
- }
- });
-
- // Dispose and validate
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
- optimizer->optimize();
-
- graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (external_operands().contains(ind))
- return;
-
- const auto frontend_layout = graph()->layout();
- const auto backend_layout = operand_layouts().at(ind);
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
- });
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- planTensors();
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- });
- }
-
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
- FunctionMap ret;
-
- for (auto op_ind : _data.op_order)
- {
- auto fn_seq = kernel_gen->generate(op_ind);
- ret.emplace_back(op_ind, std::move(fn_seq));
- }
-
- tensor_builder->allocate();
- initConsts();
-
- // NOTE For memory optimization, we want to free some operand data
- const_cast<ir::Graph &>(*_data.graph)
- .operands()
- .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- for (auto &it : ret)
- {
- auto &fn_seq = it.second;
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- tensor_builder->postFunctionPrepare();
- });
- }
-
- return ret;
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
index 35d777f7b..b73dd188e 100644
--- a/runtime/onert/backend/acl_neon/BackendContext.h
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -17,10 +17,11 @@
#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
+#include "TensorBuilder.h"
namespace onert
{
@@ -31,34 +32,8 @@ namespace acl_neon
class Optimizer;
-class BackendContext : public onert::backend::BackendContext
-{
-public:
- BackendContext(const Backend *backend, ContextData &&data,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
- kernel_gen}
- {
- }
-
- ITensorRegistry *genTensors() override;
- FunctionMap genKernels() override;
-
-private:
- void initConsts();
- void planTensors();
-
-public:
- // TODO Make it private
- std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
- std::shared_ptr<KernelGenerator> kernel_gen;
- std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+ acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
} // namespace acl_neon
} // namespace backend
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 35da7c952..1bd702756 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -37,21 +37,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
if (block_size_obj.isConstant())
{
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
+ _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
}
const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 781103f9c..283edd174 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -16,12 +16,12 @@
#include "Optimizer.h"
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
-#include <cassert>
#include <compiler/LoweredGraph.h>
#include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
namespace onert
{
diff --git a/runtime/onert/backend/cl_common/CMakeLists.txt b/runtime/onert/backend/cl_common/CMakeLists.txt
new file mode 100644
index 000000000..c75129696
--- /dev/null
+++ b/runtime/onert/backend/cl_common/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(${LIB_ONERT_BACKEND_CL_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set_target_properties(${LIB_ONERT_BACKEND_CL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC onert_core)
diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
new file mode 100644
index 000000000..7bb72d74e
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator>
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, ContextData &&data,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+ kernel_gen}
+ {
+ }
+
+ FunctionMap genKernels() override
+ {
+ FunctionMap ret;
+
+ // kernel_gen
+ for (auto op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+ }
+
+protected:
+ void initConsts()
+ {
+ _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+ constant_initializer->setLayout(graph()->layout());
+ op.accept(*constant_initializer);
+ });
+
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (_data.external_operands.contains(ind) || !operand.isConstant())
+ return;
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ });
+
+ constant_initializer->run();
+ }
+
+ virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout) = 0;
+
+ void planTensors()
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (_data.external_operands.contains(ind))
+ return;
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any operation (No use and def)
+ const auto info = obj.info();
+ const auto layout = _data.operand_layouts.at(ind);
+ // TODO Change tensor info to have permuted shape
+ registerTensorInfo(ind, info, layout);
+ }
+ });
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_ind : _data.op_order)
+ {
+ const auto &op = graph()->operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (uses_map[ind] == 0)
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ });
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ }
+
+public:
+ // TODO Make it protected
+ std::shared_ptr<T_TensorBuilder> tensor_builder;
+ std::shared_ptr<T_ConstantInitializer> constant_initializer;
+ std::shared_ptr<T_KernelGenerator> kernel_gen;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
new file mode 100644
index 000000000..5fe5eec79
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+#define __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+
+#include "cl_common/ParentInfo.h"
+
+#include <ir/OperandIndexMap.h>
+
+#include <map>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Abstract UserType into LifetimeMap and LifetimeSeq
+enum class UsesType
+{
+ FIRST,
+ LAST
+};
+
+// TODO Define class or struct for LifetimeMap and LifetimeSeq
+using LifetimeMap = std::map<size_t, std::pair<UsesType, ir::OperandIndex>>;
+using LifetimeSeq = std::vector<std::pair<UsesType, ir::OperandIndex>>;
+
+LifetimeMap createLifetimeMap(LifetimeSeq &seq, ir::OperandIndexMap<ParentInfo> &parent_map);
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
diff --git a/runtime/onert/backend/gpu_cl/ParentInfo.h b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
index d7cb2d4fb..510211cb7 100644
--- a/runtime/onert/backend/gpu_cl/ParentInfo.h
+++ b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_PARENT_INFO_H__
-#define __ONERT_BACKEND_PARENT_INFO_H__
+#ifndef __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+#define __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
#include <ir/Index.h>
#include <ir/Coordinates.h>
@@ -24,7 +24,7 @@ namespace onert
{
namespace backend
{
-namespace gpu_cl
+namespace cl_common
{
/**
@@ -37,8 +37,8 @@ struct ParentInfo
ir::Coordinates coordinates;
};
-} // namespace gpu_cl
+} // namespace cl_common
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#endif // __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/cl_common/src/LifetimeMap.cc b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
new file mode 100644
index 000000000..0b17c58fb
--- /dev/null
+++ b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cl_common/LifetimeMap.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+LifetimeMap createLifetimeMap(LifetimeSeq &lifetime_seq,
+ ir::OperandIndexMap<ParentInfo> &parent_map)
+{
+ // Update lifetime sequence to apply subtensor optimization
+ std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
+ std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
+ [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+ ir::OperandIndex &ret = root_map[ind];
+
+ // We know the root parent value already
+ if (ret.valid())
+ return ret;
+
+ auto itr = parent_map.find(ind);
+ if (itr == parent_map.end())
+ {
+ // If there is no parent, let's store the value of itself
+ return ret = ind;
+ }
+ else
+ {
+ return ret = find_root(itr->second.parent);
+ }
+ };
+
+ ir::OperandIndexMap<bool> first_use_check;
+ ir::OperandIndexMap<bool> last_use_check;
+ LifetimeMap lifetime_map;
+ for (size_t i = 0; i < lifetime_seq.size(); i++)
+ {
+ auto &entry = lifetime_seq[i];
+ if (entry.first != UsesType::FIRST)
+ continue;
+ auto root_ind = find_root(entry.second);
+ if (first_use_check[root_ind])
+ continue;
+ first_use_check[root_ind] = true;
+ lifetime_map[i] = {UsesType::FIRST, root_ind};
+ }
+
+ for (int i = lifetime_seq.size() - 1; i >= 0; i--)
+ {
+ auto &entry = lifetime_seq[i];
+ if (entry.first != UsesType::LAST)
+ continue;
+ auto root_ind = find_root(entry.second);
+ if (last_use_check[root_ind])
+ continue;
+ last_use_check[root_ind] = true;
+ lifetime_map[i] = {UsesType::LAST, root_ind};
+ }
+
+ return lifetime_map;
+}
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc
index 8ac875842..aa4ef352e 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -194,7 +194,7 @@ void CalculateActivationRangeQuantized(ir::Activation activation, const IPortabl
}
else
{
- std::cout << "Unsupported fused activation function." << std::endl;
+ throw std::runtime_error{"Unsupported fused activation function."};
}
}
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index ac2fbb84f..1fefc3228 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -18,19 +18,19 @@
#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
#include <backend/IPortableTensor.h>
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-#include <iostream>
#include <ir/DataType.h>
-#include <ir/InternalType.h>
#include <ir/Operand.h>
#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
#include <limits>
#include <vector>
using OperandType = onert::ir::DataType;
+using namespace onert::util;
namespace onert
{
@@ -166,40 +166,6 @@ void GetQuantizedConvolutionMultipliersAndShifts(
int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
std::vector<int> &per_channel_output_shift);
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::SIGMOID)
- {
- *activation_min = 0;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- std::cout << "Unsupported fused activation function." << std::endl;
- }
-}
-
void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
int32_t *act_min, int32_t *act_max);
diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h
index dc0b8596c..d67ba1602 100644
--- a/runtime/onert/backend/gpu_cl/Backend.h
+++ b/runtime/onert/backend/gpu_cl/Backend.h
@@ -22,13 +22,13 @@
#include "BackendContext.h"
#include "Config.h"
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
#include "KernelGenerator.h"
#include "TensorManager.h"
#include "TensorBuilder.h"
-#include "open_cl/Environment.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace onert
{
@@ -50,22 +50,22 @@ public:
const auto &operands = data.graph->operands();
auto context = std::make_unique<gpu_cl::BackendContext>(this, std::move(data));
- auto environment = std::make_shared<Environment>();
+ auto environment = std::make_shared<tflite::gpu::cl::Environment>();
if (!CreateEnvironment(environment.get()).ok())
{
return nullptr;
}
auto tm = createTensorManager(&environment->context());
- auto tr = std::make_shared<ClTensorRegistry<TensorManager>>(tm);
+ auto tr = std::make_shared<TensorRegistry>(tm);
- InferenceContext::CreateInferenceInfo create_info;
- create_info.precision = CalculationsPrecision::F32;
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
+ create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
create_info.storage_type =
- GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
- create_info.hints.Add(ModelHints::kFastestInference);
+ tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
+ create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
- auto cc = std::make_shared<CreationContext>();
+ auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
cc->device = environment->GetDevicePtr();
cc->context = &environment->context();
cc->queue = environment->queue();
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc
index 6c3ac81a2..ec9442155 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.cc
+++ b/runtime/onert/backend/gpu_cl/BackendContext.cc
@@ -33,147 +33,26 @@ namespace backend
namespace gpu_cl
{
-void BackendContext::initConsts()
+void BackendContext::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout)
{
- _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
- constant_initializer->setLayout(graph()->layout());
- op.accept(*constant_initializer);
- });
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- if (_data.external_operands.contains(ind) || !operand.isConstant())
- return;
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- });
-
- constant_initializer->run();
+ TensorType type = TensorType::TENSOR_TYPE_VALID;
+ tensor_builder->registerTensorInfo(ind, info, backend_layout, type);
}
-void BackendContext::planTensors()
+ITensorRegistry *BackendContext::genTensors()
{
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (_data.external_operands.contains(ind))
- return;
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- if (obj.isConstant())
- constants.append(ind);
-
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any operation (No use and def)
- const auto info = obj.info();
- const auto layout = _data.operand_layouts.at(ind);
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, layout);
- }
- });
-
- // Start scanning to do notify{First|Last}Use for each tensor
+ ir::OperandIndexMap<TensorType> type_map;
- // If a tensor is a constant, increase the use of the tensor and allocate it first.
- // Increasing use count here makes the tensor never be deallocated, i.e it they will be
- // deallocated last.
- VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
+ for (const auto &ind : graph()->getInputs())
{
- uses_map[ind]++;
- tensor_builder->notifyFirstUse(ind);
+ type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
}
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_ind : _data.op_order)
+ for (const auto &ind : graph()->getOutputs())
{
- const auto &op = graph()->operations().at(op_ind);
- auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
- // Define outputs
- for (const auto &ind : op_outputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph()->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : op_inputs)
- {
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
- }
- }
+ type_map[ind] = TensorType::TENSOR_TYPE_OUTPUT;
}
-
- _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (uses_map[ind] == 0)
- {
- tensor_builder->notifyLastUse(ind);
- }
- });
-
- // Dispose and validate
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (external_operands().contains(ind))
return;
@@ -182,7 +61,11 @@ ITensorRegistry *BackendContext::genTensors()
const auto backend_layout = operand_layouts().at(ind);
ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+ if (obj.isConstant())
+ {
+ type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
+ }
+ tensor_builder->registerTensorInfo(ind, backend_info, backend_layout, type_map[ind]);
});
// TODO Get compiler options from compiler, and use it rather than getting it from Env
@@ -199,44 +82,10 @@ ITensorRegistry *BackendContext::genTensors()
tensor_builder->notifyFirstUse(ind);
});
}
-
tensor_builder->prepare();
-
return tensor_registry.get();
}
-FunctionMap BackendContext::genKernels()
-{
- FunctionMap ret;
-
- // kernel_gen
- for (auto op_ind : _data.op_order)
- {
- auto fn_seq = kernel_gen->generate(op_ind);
- ret.emplace_back(op_ind, std::move(fn_seq));
- }
-
- tensor_builder->allocate();
-
- initConsts();
-
- // NOTE For memory optimization, we want to free some operand data
- const_cast<ir::Graph &>(*_data.graph)
- .operands()
- .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- for (auto &it : ret)
- {
- auto &fn_seq = it.second;
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- tensor_builder->postFunctionPrepare();
- });
- }
-
- return ret;
-}
-
} // namespace gpu_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h
index f17489e7a..7412d2bce 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.h
+++ b/runtime/onert/backend/gpu_cl/BackendContext.h
@@ -20,10 +20,12 @@
#include <backend/BackendContext.h>
#include <util/ConfigSource.h>
+#include <cl_common/BackendContext.h>
+
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
-#include "open_cl/InferenceContext.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
namespace onert
{
@@ -32,31 +34,28 @@ namespace backend
namespace gpu_cl
{
-class BackendContext : public onert::backend::BackendContext
+class BackendContext
+ : public onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+ KernelGenerator>
{
public:
BackendContext(const Backend *backend, ContextData &&data,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
- kernel_gen}
+ : onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+ KernelGenerator>(
+ backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
{
+ // DO NOTHING
}
ITensorRegistry *genTensors() override;
- FunctionMap genKernels() override;
-
-private:
- void initConsts();
- void planTensors();
-public:
- std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
- std::shared_ptr<KernelGenerator> kernel_gen;
+protected:
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout) override;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt
index 49bae37f8..eb1964214 100644
--- a/runtime/onert/backend/gpu_cl/CMakeLists.txt
+++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt
@@ -1,14 +1,14 @@
set(LIB_ONERT_BACKEND_GPU_CL onert_backend_gpu_cl)
+if(NOT BUILD_GPU_CL)
+ return()
+endif(NOT BUILD_GPU_CL)
+
nnas_find_package(Opencl_Headers QUIET)
if(NOT Opencl_Headers_FOUND)
return()
endif(NOT Opencl_Headers_FOUND)
-if(NOT BUILD_GPU_CL)
- return()
-endif(NOT BUILD_GPU_CL)
-
nnas_find_package(Farmhash QUIET)
if(NOT Farmhash_FOUND)
return()
@@ -19,18 +19,32 @@ if(NOT Abseil_FOUND)
return()
endif(NOT Abseil_FOUND)
-file(GLOB_RECURSE SOURCES "*.cc")
+nnfw_find_package(Fp16 QUIET)
+if(NOT Fp16_FOUND)
+ return()
+endif(NOT Fp16_FOUND)
+nnas_find_package(TensorFlowGpu QUIET)
+if(NOT TensorFlowGpu_FOUND)
+ message(FATAL_ERROR 'TensorFlowGpu lib not found')
+ return()
+endif(NOT TensorFlowGpu_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
index b3ef2f560..05dd8e2a3 100644
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
@@ -93,6 +93,9 @@ void ClConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &i
case DataType::FLOAT32:
_init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
break;
+ case DataType::INT32:
+ _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+ break;
default:
throw std::runtime_error("Not supported, yet");
break;
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
index d7d21e847..95e228acd 100644
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
@@ -17,8 +17,6 @@
#ifndef __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
-#include "ClTensorRegistry.h"
-
#include <unordered_map>
#include <functional>
diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h
index 9d3d69092..5e8a11a84 100644
--- a/runtime/onert/backend/gpu_cl/ClFunction.h
+++ b/runtime/onert/backend/gpu_cl/ClFunction.h
@@ -22,9 +22,9 @@
#include <vector>
#include <memory>
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace onert
{
@@ -32,19 +32,18 @@ namespace backend
{
namespace gpu_cl
{
-
class ClFunction : public ::onert::exec::IFunction
{
public:
ClFunction() : _gpu_operations(), _creation_context() {}
public:
- void configure(std::shared_ptr<CreationContext> creation_context)
+ void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
{
_creation_context = creation_context;
}
- void add_operation(std::unique_ptr<GPUOperation> gpu_operation)
+ void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
{
_gpu_operations.push_back(std::move(gpu_operation));
}
@@ -57,6 +56,10 @@ public:
{
throw std::runtime_error("Failed to AddToQueue.");
}
+ if (!_creation_context->queue->WaitForCompletion().ok())
+ {
+ throw std::runtime_error("Failed to WaitForCompletion.");
+ }
}
}
@@ -77,8 +80,8 @@ public:
}
private:
- std::vector<std::unique_ptr<GPUOperation>> _gpu_operations;
- std::shared_ptr<CreationContext> _creation_context;
+ std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
+ std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/ClMemoryManager.h b/runtime/onert/backend/gpu_cl/ClMemoryManager.h
deleted file mode 100644
index 3bac0d51d..000000000
--- a/runtime/onert/backend/gpu_cl/ClMemoryManager.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-
-#include <cassert>
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Shape.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/InferenceContext.h"
-#include "open_cl/Status.h"
-#include "open_cl/StorageTypeUtil.h"
-#include "open_cl/TensorType.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClMemoryManager
-{
-public:
- ClMemoryManager(CLContext *context) : _context{context} {}
-
- virtual ~ClMemoryManager() = default;
-
- virtual void allocate(void)
- {
- for (const auto &tensor_entry : _tensors)
- {
- auto tensor = tensor_entry.second;
- const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
- const auto &shape = t->shape;
- const auto &descriptor = t->descriptor;
- if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
- {
- return;
- }
- }
- }
-
- virtual void deallocate(void)
- {
- // NYI
- }
-
- virtual void startLifetime(const ir::OperandIndex &)
- { /* DO NOTHING */
- }
- virtual void finishLifetime(const ir::OperandIndex &)
- { /* DO NOTHING */
- }
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<Environment> environment, DeviceInfo &device_info)
- {
- ValueId max_id = 0;
- auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
- const auto shape = info.shape();
-
- auto tensor = std::make_shared<T_Tensor>(shape.rank(), shape, environment);
- _tensors[ind] = tensor;
-
- BHWC t_shape;
- switch (shape.rank())
- {
- case 1:
- // B layout
- t_shape = BHWC(shape.dim(0), 1, 1, 1);
- break;
- case 2:
- // BC layout
- t_shape = BHWC(shape.dim(0), 1, 1, shape.dim(1));
- break;
- case 3:
- // BWC layout
- t_shape = BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
- break;
- case 4:
- // BHWC layout
- t_shape = BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
- break;
- default:
- break;
- }
-
- TensorStorageType storage_type = create_info.storage_type;
- Layout layout = t_shape.b == 1 ? Layout::HWC : Layout::BHWC;
-
- ValueId id = ind.value();
- storage_type = SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
- auto dummy = std::make_shared<InferenceContext::DummyTensor>();
- dummy->shape = t_shape;
- dummy->descriptor = TensorDescriptor{data_type, storage_type, layout};
- tensor_reserver_.Add(id, dummy);
-
- max_id = std::max(max_id, id);
-
- tensor_reserver_.SetNext(max_id + 1);
- }
-
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; }
-
- InferenceContext::TensorReserver &tensorReservers(void) { return tensor_reserver_; }
-
-private:
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors;
- InferenceContext::TensorReserver tensor_reserver_;
- CLContext *_context;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h b/runtime/onert/backend/gpu_cl/ClTensorBuilder.h
deleted file mode 100644
index 951bbd844..000000000
--- a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-
-#include <memory>
-#include <queue>
-
-#include "ClTensorManager.h"
-#include "ClTensorRegistry.h"
-#include "ParentInfo.h"
-
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/InferenceContext.h"
-
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-#include <ir/Operands.h>
-#include <util/Utils.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class UsesType
-{
- FIRST,
- LAST
-};
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorBuilder
-{
-public:
- using T_ClTensorManager = ClTensorManager<T_ITensor, T_Tensor>;
-
- ClTensorBuilder(const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
- InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<Environment> &environment);
-
- /**
- * @brief Register tensor information to allocate on ACL-CL backend
- * @param[in] ind Operand index
- * @param[in] info Tensor information
- * @param[in] layout Tensor data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout);
-
- void notifyFirstUse(const ir::OperandIndex &);
- void notifyLastUse(const ir::OperandIndex &);
-
- bool isRegistered(const ir::OperandIndex &) const;
-
- void prepare();
- void allocate();
- void postFunctionPrepare();
-
- T_ClTensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
-
- void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
- {
- assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
- : true);
- _uses_count_map[index] = num_uses;
- }
-
- void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
- {
- _parent_map = std::move(parent_map);
- }
-
- bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
-
- /**
- * @brief Check child tensor is allocated as subtensor of parent tensor
- * @param[in] parent Index of parent
- * @param[in] child Index of child
- * @return @c true if child is allocated as subtensor of parent, otherwise @c false
- */
- bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
-
-private:
- void buildTensors(void);
- ir::OperandIndex findRootParent(ir::OperandIndex index);
-
-private:
- const ir::Operands &_operands;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
- ir::OperandIndexMap<size_t> _uses_count_map;
-
- std::unique_ptr<T_ClTensorManager> _tensor_mgr;
- InferenceContext::CreateInferenceInfo _create_info;
- std::shared_ptr<Environment> _environment;
-
- // for linear executor
- std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
-
- // Extra info for concat elimination
- ir::OperandIndexMap<ParentInfo> _parent_map;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include <stack>
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorBuilder<T_ITensor, T_Tensor>::ClTensorBuilder(
- const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
- InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<Environment> &environment)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
- environment}
-{
- assert(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::registerTensorInfo(const ir::OperandIndex &ind,
- const ir::OperandInfo &info,
- ir::Layout backend_layout)
-{
- assert(_tensor_mgr->constTensors().size() == 0);
- assert(_tensor_mgr->nonconstTensors().size() == 0);
-
- _uses_count_map[ind] = _operands.at(ind).getUses().size();
-
- _tensor_info_map.emplace(ind, info);
- _tensor_layout_map.insert({ind, backend_layout});
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyFirstUse(const ir::OperandIndex &ind)
-{
- _lifetime_seq.emplace_back(UsesType::FIRST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyLastUse(const ir::OperandIndex &ind)
-{
- _lifetime_seq.emplace_back(UsesType::LAST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-bool ClTensorBuilder<T_ITensor, T_Tensor>::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::prepare(void)
-{
- buildTensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::allocate(void)
-{
- // Update lifetime sequence to apply subtensor optimization
-
- std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
- std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
- [&](ir::OperandIndex ind) -> ir::OperandIndex & {
- ir::OperandIndex &ret = root_map[ind];
-
- // We know the root parent value already
- if (ret.valid())
- return ret;
-
- auto itr = _parent_map.find(ind);
- if (itr == _parent_map.end())
- {
- // If there is no parent, let's store the value of itself
- return ret = ind;
- }
- else
- {
- return ret = find_root(itr->second.parent);
- }
- };
-
- ir::OperandIndexMap<bool> first_use_check;
- ir::OperandIndexMap<bool> last_use_check;
- std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
- for (size_t i = 0; i < _lifetime_seq.size(); i++)
- {
- auto &entry = _lifetime_seq[i];
- if (entry.first != UsesType::FIRST)
- continue;
- auto root_ind = find_root(entry.second);
- if (first_use_check[root_ind])
- continue;
- first_use_check[root_ind] = true;
- lifetime_map[i] = {UsesType::FIRST, root_ind};
- }
-
- for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
- {
- auto &entry = _lifetime_seq[i];
- if (entry.first != UsesType::LAST)
- continue;
- auto root_ind = find_root(entry.second);
- if (last_use_check[root_ind])
- continue;
- last_use_check[root_ind] = true;
- lifetime_map[i] = {UsesType::LAST, root_ind};
- }
-
- for (auto &entry : lifetime_map)
- {
- auto &use = entry.second;
- auto use_type = use.first;
- auto use_index = use.second;
- assert(use_index.valid());
- if (use_type == UsesType::FIRST)
- _tensor_mgr->startLifetime(use_index);
- else
- _tensor_mgr->finishLifetime(use_index);
- }
-
- _tensor_mgr->allocateConsts();
-
- // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
- // After refactoring BackendContext we can uncomment this
- // assert(_tensor_info_map.size() ==
- // _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
- // _parent_map.size());
- _tensor_mgr->allocateNonconsts();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::postFunctionPrepare(void)
-{
- _tensor_mgr->tryDeallocConstants();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::buildTensors(void)
-{
- assert(_tensor_mgr->constTensors().size() == 0);
- assert(_tensor_mgr->nonconstTensors().size() == 0);
- // Normal tensors
- for (auto &entry : _tensor_info_map)
- {
- auto ind = entry.first;
- if (_parent_map.count(ind) > 0)
- continue;
-
- const auto &info = entry.second;
- _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_);
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorManager.h b/runtime/onert/backend/gpu_cl/ClTensorManager.h
deleted file mode 100644
index 49a11730f..000000000
--- a/runtime/onert/backend/gpu_cl/ClTensorManager.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-
-#include "ClMemoryManager.h"
-
-#include "open_cl/InferenceContext.h"
-#include "open_cl/TensorType.h"
-
-#include "ir/OperandInfo.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorManager
-{
-public:
- using T_ClMemoryManager = ClMemoryManager<T_ITensor, T_Tensor>;
-
- ClTensorManager(T_ClMemoryManager *const_mgr, T_ClMemoryManager *nonconst_mgr);
-
- virtual ~ClTensorManager() = default;
-
- void allocateConsts(void);
- void allocateNonconsts(void);
- void deallocateConsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<Environment> environment, DeviceInfo &device_info);
-
- std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind);
-
- void startLifetime(const ir::OperandIndex &ind);
- void finishLifetime(const ir::OperandIndex &ind);
-
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<InferenceContext::DummyTensor> atR(const ir::OperandIndex &ind);
-
- InferenceContext::TensorReserver &constTensorReservers(void);
- InferenceContext::TensorReserver &nonconstTensorReservers(void);
-
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void);
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
- void tryDeallocConstants(void);
-
-private:
- std::unique_ptr<T_ClMemoryManager> _const_mgr;
- std::unique_ptr<T_ClMemoryManager> _nonconst_mgr;
- ir::OperandIndexMap<T_ClMemoryManager &> _ind_to_mgr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorManager<T_ITensor, T_Tensor>::ClTensorManager(T_ClMemoryManager *const_mgr,
- T_ClMemoryManager *nonconst_mgr)
- : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
-{
- // DO NOTHING
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateConsts(void)
-{
- _const_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateConsts(void)
-{
- _const_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateNonconsts(void)
-{
- _nonconst_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::buildTensor(
- const ir::OperandIndex &ind, const ir::OperandInfo &info,
- InferenceContext::CreateInferenceInfo create_info, std::shared_ptr<Environment> environment,
- DeviceInfo &device_info)
-{
- assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
-
- if (info.isConstant())
- {
- _const_mgr->buildTensor(ind, info, create_info, environment, device_info);
- _ind_to_mgr.insert({ind, *_const_mgr});
- }
- else
- {
- _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info);
- _ind_to_mgr.insert({ind, *_nonconst_mgr});
- }
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::startLifetime(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).startLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::finishLifetime(const ir::OperandIndex &ind)
-{
- assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
- _ind_to_mgr.at(ind).finishLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<T_ITensor> ClTensorManager<T_ITensor, T_Tensor>::at(const ir::OperandIndex &ind)
-{
- if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
- return nullptr;
-
- auto &tensors = _ind_to_mgr.at(ind).tensors();
- if (tensors.find(ind) != tensors.end())
- {
- return tensors.at(ind);
- }
-
- return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::constTensors(void)
-{
- return _const_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensors(void)
-{
- return _nonconst_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<InferenceContext::DummyTensor>
-ClTensorManager<T_ITensor, T_Tensor>::atR(const ir::OperandIndex &ind)
-{
- if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _nonconst_mgr->tensorReservers().Get(ind.value());
- }
- else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _const_mgr->tensorReservers().Get(ind.value());
- }
- return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &ClTensorManager<T_ITensor, T_Tensor>::constTensorReservers(void)
-{
- return _const_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensorReservers(void)
-{
- return _nonconst_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::iterate(
- const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (auto it : _nonconst_mgr->tensors())
- fn(it.first);
-
- for (auto it : _const_mgr->tensors())
- fn(it.first);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::tryDeallocConstants(void)
-{
- // NYI
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/Config.cc b/runtime/onert/backend/gpu_cl/Config.cc
index 067a2070f..9959a471b 100644
--- a/runtime/onert/backend/gpu_cl/Config.cc
+++ b/runtime/onert/backend/gpu_cl/Config.cc
@@ -17,8 +17,11 @@
#include "Config.h"
#include <dlfcn.h>
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/Status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+
+using namespace tflite::gpu::cl;
namespace onert
{
@@ -26,12 +29,9 @@ namespace backend
{
namespace gpu_cl
{
-
-Config::~Config() { UnloadOpenCL(_handle); }
-
bool Config::initialize()
{
- if (LoadOpenCL(&_handle).ok())
+ if (LoadOpenCL().ok())
{
return true;
}
diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h
index aa5a51a15..6a455bbb5 100644
--- a/runtime/onert/backend/gpu_cl/Config.h
+++ b/runtime/onert/backend/gpu_cl/Config.h
@@ -31,7 +31,7 @@ namespace gpu_cl
class Config : public IConfig
{
public:
- virtual ~Config();
+ virtual ~Config() {}
public:
std::string id() override { return "gpu_cl"; }
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
index a84867f8c..04edc3928 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
@@ -19,13 +19,14 @@
#include "KernelGenerator.h"
-#include "ClTensorRegistry.h"
#include "ClFunction.h"
#include "TensorManager.h"
-#include "open_cl/selectors/ConvolutionSelector.h"
-#include "open_cl/selectors/DwConvolutionSelector.h"
-#include "open_cl/selectors/SimpleSelectors.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
#include "ir/Operations.h"
#include "ir/Operations.Include.h"
@@ -37,6 +38,9 @@
#include "util/logging.h"
#include "util/Utils.h"
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+
namespace onert
{
namespace backend
@@ -60,14 +64,14 @@ void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *a
}
}
-gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
{
switch (type_ir)
{
case ir::operation::Pool2D::PoolType::AVG:
- return gpu_cl::PoolingType::AVERAGE;
+ return PoolingType::AVERAGE;
case ir::operation::Pool2D::PoolType::MAX:
- return gpu_cl::PoolingType::MAX;
+ return PoolingType::MAX;
default:
throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
}
@@ -75,7 +79,7 @@ gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
KernelGenerator::KernelGenerator(const ir::Graph &graph,
const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<CreationContext> &creation_context)
: basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
_operations_ctx(graph.operations()), _current_layout{graph.layout()},
@@ -190,7 +194,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
auto bias_tensor = _tensor_reg->getClTensor(bias);
auto output_tensor = _tensor_reg->getClTensor(output);
- gpu_cl::Convolution2DAttributes attr;
+ Convolution2DAttributes attr;
attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
@@ -237,7 +241,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
{
std::unique_ptr<GPUOperation> gpu_op_1;
OperationDef op_def_1;
- std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+ std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
_new_tensors[output] = new_tensor;
if (!CreateTensor(*_creation_context->context, output_shape,
@@ -334,9 +338,9 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const int filter_width = ker_shape.w;
const int output_depth = out_shape.c;
- InternalTensor<OHWI, DataType::FLOAT32> weights;
+ tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
weights.id = attr.weights.id;
- weights.shape = OHWI(output_depth, filter_height, filter_width, input_depth);
+ weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
weights.data.resize(weights.shape.DimensionsProduct());
float *dst = &weights.data[0];
for (int j = 0; j < output_depth; ++j)
@@ -387,7 +391,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
{
std::unique_ptr<GPUOperation> gpu_op_1;
OperationDef op_def_1;
- std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+ std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
_new_tensors[ofm_index] = new_tensor;
if (!CreateTensor(*_creation_context->context, out_shape,
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h
index 3e341b111..91fd3cd9d 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.h
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h
@@ -17,11 +17,13 @@
#ifndef __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
#include "backend/basic/TensorRegistry.h"
#include "TensorBuilder.h"
#include "TensorManager.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+
#include <backend/CustomKernelBuilder.h>
#include <backend/basic/KernelGeneratorBase.h>
#include <ir/Operands.h>
@@ -39,8 +41,8 @@ class KernelGenerator : public basic::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
- const std::shared_ptr<CreationContext> &creation_context);
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context);
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
@@ -58,9 +60,9 @@ private:
const ir::Operations &_operations_ctx;
ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<ClTensorRegistry<TensorManager>> _tensor_reg;
- std::shared_ptr<CreationContext> _creation_context;
- ir::OperandIndexMap<std::shared_ptr<Tensor>> _new_tensors;
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+ ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h
new file mode 100644
index 000000000..a3b9b39de
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/MemoryManager.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+
+#include "ex/InferenceContextEx.h"
+#include "operand/CLTensor.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "util/logging.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class MemoryManager
+{
+public:
+ MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+
+ ~MemoryManager() = default;
+
+ void allocate(void)
+ {
+ for (const auto &tensor_entry : _tensors)
+ {
+ auto tensor = tensor_entry.second;
+ auto type = tensor->get_type();
+
+ // if (type == TensorType::TENSOR_TYPE_DELETE) {
+ // continue;
+ // }
+
+ const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
+ const auto &shape = t->shape;
+ const auto &descriptor = t->descriptor;
+ if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
+ {
+ std::runtime_error("Failed to CreateTensor");
+ }
+ switch (type)
+ {
+ case TensorType::TENSOR_TYPE_INPUT:
+ tensor->writeConvertInit();
+ break;
+ case TensorType::TENSOR_TYPE_OUTPUT:
+ tensor->readConvertInit();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ void deallocate(void)
+ {
+ // NYI
+ }
+
+ void startLifetime(const ir::OperandIndex &)
+ { /* DO NOTHING */
+ }
+ void finishLifetime(const ir::OperandIndex &)
+ { /* DO NOTHING */
+ }
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment,
+ tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+ {
+ tflite::gpu::ValueId max_id = 0;
+ auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
+ const auto shape = info.shape();
+
+ auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
+ _tensors[ind] = tensor;
+ tflite::gpu::BHWC t_shape;
+ switch (shape.rank())
+ {
+ case 1:
+ // B layout
+ t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+ break;
+ case 2:
+ // BC layout
+ t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+ break;
+ case 3:
+ // BWC layout
+ t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+ break;
+ case 4:
+ // BHWC layout
+ t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+ break;
+ default:
+ break;
+ }
+
+ tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
+ tflite::gpu::Layout layout =
+ t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+ tflite::gpu::ValueId id = ind.value();
+ storage_type =
+ tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
+ auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
+ dummy->shape = t_shape;
+ dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
+ tensor_reserver_.Add(id, dummy);
+
+ max_id = std::max(max_id, id);
+
+ tensor_reserver_.SetNext(max_id + 1);
+ }
+
+ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
+
+ InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
+
+private:
+ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
+ InferenceContextEx::TensorReserverEx tensor_reserver_;
+ tflite::gpu::cl::CLContext *_context;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
new file mode 100644
index 000000000..e71733427
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <queue>
+
+#include "TensorBuilder.h"
+
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include <ir/Operands.h>
+#include <util/Utils.h>
+
+#include <cassert>
+#include <stack>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+using UsesType = cl_common::UsesType;
+
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
+ environment}
+{
+ assert(_tensor_mgr);
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout, TensorType type)
+{
+ assert(_tensor_mgr->constTensors().size() == 0);
+ assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+ _uses_count_map[ind] = _operands.at(ind).getUses().size();
+
+ _tensor_info_map.emplace(ind, info);
+ _tensor_type_map.emplace(ind, type);
+
+ _tensor_layout_map.insert({ind, backend_layout});
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ _lifetime_seq.emplace_back(UsesType::FIRST, ind);
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ _lifetime_seq.emplace_back(UsesType::LAST, ind);
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { buildTensors(); }
+
+void TensorBuilder::allocate(void)
+{
+ auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
+
+ for (auto &entry : lifetime_map)
+ {
+ auto &use = entry.second;
+ auto use_type = use.first;
+ auto use_index = use.second;
+ assert(use_index.valid());
+ if (use_type == UsesType::FIRST)
+ _tensor_mgr->startLifetime(use_index);
+ else
+ _tensor_mgr->finishLifetime(use_index);
+ }
+
+ _tensor_mgr->allocateConsts();
+
+ // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
+ // After refactoring BackendContext we can uncomment this
+ // assert(_tensor_info_map.size() ==
+ // _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
+ // _parent_map.size());
+ _tensor_mgr->allocateNonconsts();
+}
+
+void TensorBuilder::postFunctionPrepare(void) { _tensor_mgr->tryDeallocConstants(); }
+
+void TensorBuilder::buildTensors(void)
+{
+ assert(_tensor_mgr->constTensors().size() == 0);
+ assert(_tensor_mgr->nonconstTensors().size() == 0);
+ // Normal tensors
+ for (auto &entry : _tensor_info_map)
+ {
+ auto ind = entry.first;
+ if (_parent_map.count(ind) > 0)
+ continue;
+ auto type = _tensor_type_map.at(ind);
+ const auto &info = entry.second;
+ _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
+ type);
+ }
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h
index d55358191..2a5cb8b5e 100644
--- a/runtime/onert/backend/gpu_cl/TensorBuilder.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h
@@ -17,10 +17,13 @@
#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
-#include <backend/basic/TensorBuilder.h>
-#include "operand/ICLTensor.h"
-#include "operand/CLTensor.h"
-#include "ClTensorBuilder.h"
+#include "TensorManager.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/Operands.h>
+#include <ir/OperandIndexSequence.h>
namespace onert
{
@@ -28,8 +31,76 @@ namespace backend
{
namespace gpu_cl
{
+class TensorBuilder
+{
+public:
+ TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+
+ /**
+ * @brief Register tensor information to allocate on ACL-CL backend
+ * @param[in] ind Operand index
+ * @param[in] info Tensor information
+ * @param[in] layout Tensor data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout, TensorType type);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare();
+ void allocate();
+ void postFunctionPrepare();
+
+ TensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
+
+ void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
+ {
+ assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+ : true);
+ _uses_count_map[index] = num_uses;
+ }
+
+ void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
+ {
+ _parent_map = std::move(parent_map);
+ }
+
+ bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
+
+ /**
+ * @brief Check child tensor is allocated as subtensor of parent tensor
+ * @param[in] parent Index of parent
+ * @param[in] child Index of child
+ * @return @c true if child is allocated as subtensor of parent, otherwise @c false
+ */
+ bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
+
+private:
+ void buildTensors(void);
+ ir::OperandIndex findRootParent(ir::OperandIndex index);
+
+private:
+ const ir::Operands &_operands;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+ ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
+ ir::OperandIndexMap<TensorType> _tensor_type_map;
+ ir::OperandIndexMap<size_t> _uses_count_map;
+
+ std::unique_ptr<TensorManager> _tensor_mgr;
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+
+ // for linear executor
+ cl_common::LifetimeSeq _lifetime_seq;
-using TensorBuilder = ClTensorBuilder<operand::ICLTensor, operand::CLTensor>;
+ // Extra info for concat elimination
+ ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
+};
} // namespace gpu_cl
} // namespace backend
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
index 4700381dc..7290ff5da 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
@@ -1,12 +1,11 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,8 +14,11 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
namespace onert
{
@@ -25,20 +27,18 @@ namespace backend
namespace gpu_cl
{
-enum class ConvWeightsLayout
+enum TensorType
{
- kUnknown,
- kOHWIOGroupI4O4,
+ TENSOR_TYPE_VALID = 0,
+ TENSOR_TYPE_INPUT = 1,
+ TENSOR_TYPE_OUTPUT = 2,
+ TENSOR_TYPE_DELETE = 3
};
-struct ConvWeightsDescription
-{
- ConvWeightsLayout layout;
- int output_group_size;
-};
+absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
} // namespace gpu_cl
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc
new file mode 100644
index 000000000..9fe0605ac
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorManager.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+TensorManager::TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr)
+ : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
+{
+ // DO NOTHING
+}
+
+void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
+
+void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
+
+void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
+
+void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment,
+ tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+{
+ assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
+
+ if (info.isConstant())
+ {
+ _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _ind_to_mgr.insert({ind, *_const_mgr});
+ }
+ else
+ {
+ _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _ind_to_mgr.insert({ind, *_nonconst_mgr});
+ }
+}
+
+void TensorManager::startLifetime(const ir::OperandIndex &ind)
+{
+ assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+ _ind_to_mgr.at(ind).startLifetime(ind);
+}
+
+void TensorManager::finishLifetime(const ir::OperandIndex &ind)
+{
+ assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+ _ind_to_mgr.at(ind).finishLifetime(ind);
+}
+
+std::shared_ptr<operand::ICLTensor> TensorManager::at(const ir::OperandIndex &ind)
+{
+ if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
+ return nullptr;
+
+ auto &tensors = _ind_to_mgr.at(ind).tensors();
+ if (tensors.find(ind) != tensors.end())
+ {
+ return tensors.at(ind);
+ }
+
+ return nullptr;
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::constTensors(void)
+{
+ return _const_mgr->tensors();
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconstTensors(void)
+{
+ return _nonconst_mgr->tensors();
+}
+
+std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
+{
+ if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
+ {
+ return _nonconst_mgr->tensorReservers().Get(ind.value());
+ }
+ else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
+ {
+ return _const_mgr->tensorReservers().Get(ind.value());
+ }
+ return nullptr;
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
+{
+ return _const_mgr->tensorReservers();
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
+{
+ return _nonconst_mgr->tensorReservers();
+}
+
+void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (auto it : _nonconst_mgr->tensors())
+ fn(it.first);
+
+ for (auto it : _const_mgr->tensors())
+ fn(it.first);
+}
+
+void TensorManager::tryDeallocConstants(void)
+{
+ // NYI
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h
index 111b5f8a7..52abc579a 100644
--- a/runtime/onert/backend/gpu_cl/TensorManager.h
+++ b/runtime/onert/backend/gpu_cl/TensorManager.h
@@ -14,15 +14,16 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
-#include "ClMemoryManager.h"
-#include "ClTensorManager.h"
-#include "open_cl/ClContext.h"
-#include "operand/CLTensor.h"
-#include "operand/ICLTensor.h"
-#include "util/logging.h"
+#include "MemoryManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include "ir/OperandInfo.h"
+#include "ir/OperandIndexMap.h"
namespace onert
{
@@ -31,13 +32,50 @@ namespace backend
namespace gpu_cl
{
-using MemoryManager = ClMemoryManager<operand::ICLTensor, operand::CLTensor>;
+class TensorManager
+{
+public:
+ TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr);
+
+ virtual ~TensorManager() = default;
+
+ void allocateConsts(void);
+ void allocateNonconsts(void);
+ void deallocateConsts(void);
+ void deallocateNonconsts(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment,
+ tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+
+ std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
+
+ void startLifetime(const ir::OperandIndex &ind);
+ void finishLifetime(const ir::OperandIndex &ind);
+
+ std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
+ std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
+
+ InferenceContextEx::TensorReserverEx &constTensorReservers(void);
+ InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
+
+ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
+ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+ void tryDeallocConstants(void);
-using TensorManager = ClTensorManager<operand::ICLTensor, operand::CLTensor>;
+private:
+ std::unique_ptr<MemoryManager> _const_mgr;
+ std::unique_ptr<MemoryManager> _nonconst_mgr;
+ ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
+};
-inline TensorManager *createTensorManager(CLContext *context)
+inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
{
- VERBOSE(createTensorManager) << "ClTensorManager" << std::endl;
+ VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
return new TensorManager(new MemoryManager(context), new MemoryManager(context));
}
@@ -45,4 +83,4 @@ inline TensorManager *createTensorManager(CLContext *context)
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h
index 1f0018bd1..6f17aff54 100644
--- a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h
+++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h
@@ -17,6 +17,8 @@
#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
#define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
+#include "TensorManager.h"
+
#include "backend/ITensorRegistry.h"
namespace onert
@@ -27,14 +29,14 @@ namespace gpu_cl
{
/**
- * @brief Tensor registry class for acl backends
+ * @brief Tensor registry class for gpu-cl backends
*
- * This is implemented as a wrapper of AclTensorManager.
+ * This is implemented as a wrapper of TensorManager.
*/
-template <typename T_ClTensorManager> class ClTensorRegistry : public ITensorRegistry
+class TensorRegistry : public ITensorRegistry
{
public:
- ClTensorRegistry(T_ClTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+ TensorRegistry(TensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
@@ -45,7 +47,7 @@ public:
auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
private:
- T_ClTensorManager *_tensor_mgr;
+ TensorManager *_tensor_mgr;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
new file mode 100644
index 000000000..f67387904
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "absl/strings/str_cat.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class InferenceContextEx : public tflite::gpu::cl::InferenceContext
+{
+public:
+ struct DummyTensor
+ {
+ tflite::gpu::BHWC shape;
+ tflite::gpu::cl::TensorDescriptor descriptor;
+
+ bool operator==(const DummyTensor &b) const
+ {
+ return shape == b.shape && descriptor == b.descriptor;
+ }
+ };
+
+ class TensorReserverEx
+ {
+ public:
+ tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
+ {
+ reservations_[next_] = dummy;
+ return next_++;
+ }
+ void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
+ {
+ reservations_[id] = dummy;
+ }
+ void SetNext(tflite::gpu::ValueId id) { next_ = id; }
+ bool HaveTensor(tflite::gpu::ValueId id)
+ {
+ return reservations_.find(id) != reservations_.end();
+ }
+ std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
+
+ std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+ GetTensorDescs() const
+ {
+ std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
+ for (auto &v : reservations_)
+ {
+ tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
+ desc.shape.b = v.second->shape.b;
+ desc.shape.h = v.second->shape.h;
+ desc.shape.w = v.second->shape.w;
+ desc.shape.d = 1;
+ desc.shape.c = v.second->shape.c;
+ result.push_back({v.first, desc});
+ }
+ return result;
+ }
+
+ void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+ &tensors)
+ {
+ for (auto &v : tensors)
+ {
+ auto dummy = std::make_shared<DummyTensor>();
+ dummy->descriptor = v.second;
+ dummy->shape.b = v.second.shape.b;
+ dummy->shape.h = v.second.shape.h;
+ dummy->shape.w = v.second.shape.w;
+ dummy->shape.c = v.second.shape.c;
+ Add(v.first, dummy);
+ }
+ }
+
+ private:
+ // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
+ std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
+ tflite::gpu::ValueId next_ = 0;
+ };
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.cc b/runtime/onert/backend/gpu_cl/open_cl/Api.cc
deleted file mode 100644
index 10bf87c38..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.cc
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Api.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct ObjectTypeGetter
-{
- ObjectType operator()(absl::monostate) const { return ObjectType::UNKNOWN; }
- ObjectType operator()(OpenClBuffer) const { return ObjectType::OPENCL_BUFFER; }
- ObjectType operator()(OpenClTexture) const { return ObjectType::OPENCL_TEXTURE; }
- ObjectType operator()(CpuMemory) const { return ObjectType::CPU_MEMORY; }
-};
-
-struct ObjectValidityChecker
-{
- bool operator()(absl::monostate) const { return false; }
- bool operator()(OpenClBuffer obj) const { return obj.memobj; }
- bool operator()(OpenClTexture obj) const { return obj.memobj; }
- bool operator()(CpuMemory obj) const
- {
- return obj.data != nullptr && obj.size_bytes > 0 &&
- (data_type == DataType::UNKNOWN || obj.size_bytes % SizeOf(data_type) == 0);
- }
- DataType data_type;
-};
-
-} // namespace
-
-bool IsValid(const ObjectDef &def)
-{
- return def.data_type != DataType::UNKNOWN && def.data_layout != DataLayout::UNKNOWN &&
- def.object_type != ObjectType::UNKNOWN;
-}
-
-ObjectType GetType(const TensorObject &object) { return absl::visit(ObjectTypeGetter{}, object); }
-
-bool IsValid(const TensorObjectDef &def) { return IsValid(def.object_def); }
-
-bool IsValid(const TensorObjectDef &def, const TensorObject &object)
-{
- return GetType(object) == def.object_def.object_type &&
- absl::visit(ObjectValidityChecker{def.object_def.data_type}, object);
-}
-
-bool IsObjectPresent(ObjectType type, const TensorObject &obj)
-{
- switch (type)
- {
- case ObjectType::CPU_MEMORY:
- return absl::holds_alternative<CpuMemory>(obj);
- case ObjectType::OPENCL_BUFFER:
- return absl::holds_alternative<OpenClBuffer>(obj);
- case ObjectType::OPENCL_TEXTURE:
- return absl::holds_alternative<OpenClTexture>(obj);
- case ObjectType::UNKNOWN:
- return false;
- }
- return false;
-}
-
-uint32_t NumElements(const TensorObjectDef &def)
-{
- const auto &d = def.dimensions;
- switch (def.object_def.data_layout)
- {
- case DataLayout::BHWC:
- return d.product();
- case DataLayout::HWDC4:
- case DataLayout::HDWC4:
- case DataLayout::DHWC4:
- return d.b * d.h * d.w * AlignByN(d.c, 4);
- case DataLayout::UNKNOWN:
- return 0;
- }
- return 0;
-}
-
-int GetPosition(const InferenceOptions &options, InferencePriority p)
-{
- if (options.priority1 == p)
- return 1;
- if (options.priority2 == p)
- return 2;
- if (options.priority3 == p)
- return 3;
- return 4; // least important
-}
-
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
- InferencePriority p2)
-{
- int p1_position = GetPosition(options, p1);
- int p2_position = GetPosition(options, p2);
- if (p1_position == p2_position)
- return PriorityImportance::UNKNOWN;
- return p1_position < p2_position ? PriorityImportance::HIGHER : PriorityImportance::LOWER;
-}
-
-bool IsValid(const InferenceOptions &options)
-{
- if (options.usage == InferenceUsage::UNKNOWN)
- {
- return false;
- }
- if (options.priority1 == InferencePriority::UNKNOWN ||
- options.priority2 == InferencePriority::UNKNOWN ||
- options.priority3 == InferencePriority::UNKNOWN)
- {
- return false;
- }
- if (options.priority1 == InferencePriority::AUTO)
- {
- return false;
- }
- if (options.priority2 == InferencePriority::AUTO && options.priority3 != InferencePriority::AUTO)
- {
- return false;
- }
- if (options.priority1 == options.priority2 || options.priority1 == options.priority3)
- {
- return false;
- }
- if (options.priority2 == options.priority3 && options.priority2 != InferencePriority::AUTO)
- {
- return false;
- }
- return true;
-}
-
-// Implementation note: this resolution logic is shared between GL and CL
-// backends, but they might have own logic. Thus, the function is defined
-// here just for code re-use purposes.
-void ResolveAutoPriority(InferenceOptions *options)
-{
- // priority1 can not be AUTO as it would make options invalid.
- if (options->priority2 == InferencePriority::AUTO)
- {
- switch (options->priority1)
- {
- case InferencePriority::MIN_LATENCY:
- options->priority2 = InferencePriority::MIN_MEMORY_USAGE;
- options->priority3 = InferencePriority::MAX_PRECISION;
- return;
- case InferencePriority::MIN_MEMORY_USAGE:
- options->priority2 = InferencePriority::MAX_PRECISION;
- options->priority3 = InferencePriority::MIN_LATENCY;
- return;
- case InferencePriority::MAX_PRECISION:
- options->priority2 = InferencePriority::MIN_LATENCY;
- options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
- return;
- case InferencePriority::UNKNOWN:
- case InferencePriority::AUTO:
- // Invalid and unreachable option.
- return;
- }
- }
-
- if (options->priority3 == InferencePriority::AUTO)
- {
- // Simply add missing priority
- if (GetPosition(*options, InferencePriority::MIN_LATENCY) == 4)
- {
- options->priority3 = InferencePriority::MIN_LATENCY;
- }
- else if (GetPosition(*options, InferencePriority::MAX_PRECISION) == 4)
- {
- options->priority3 = InferencePriority::MAX_PRECISION;
- }
- else if (GetPosition(*options, InferencePriority::MIN_MEMORY_USAGE) == 4)
- {
- options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
- }
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.h b/runtime/onert/backend/gpu_cl/open_cl/Api.h
deleted file mode 100644
index 35be3d99c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-
-// Usage example:
-//
-// // Builder is created from a model using GPU-specific parameters.
-// std::unique_ptr<InferenceBuilder> builder = ...;
-//
-// // input data is coming from a texture
-// // output data goes to CPU
-// builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
-// ObjectType::OPENGL_TEXTURE, true});
-// builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
-// ObjectType::CPU_MEMORY, false});
-// std::unique_ptr<InferenceRunner> runner;
-// RETURN_IF_ERROR(builder->Build(&runner)); // may take significant time.
-// RETURN_IF_ERROR(
-// runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
-// RETURN_IF_ERROR(runner->Run());
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "absl/types/span.h"
-#include "absl/types/variant.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Common abbreviations:
-// B - batch
-// H - height
-// W - width
-// C - channels
-// D - depth := DivideRoundUp(C, 4)
-// C4 - is the constant = 4.
-enum class DataLayout
-{
- UNKNOWN,
- BHWC,
- DHWC4,
- HWDC4,
- HDWC4,
-};
-
-enum class ObjectType
-{
- UNKNOWN,
- CPU_MEMORY,
- OPENCL_TEXTURE,
- OPENCL_BUFFER,
-};
-
-struct OpenClBuffer
-{
- OpenClBuffer() = default;
- explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {}
-
- cl_mem memobj = nullptr;
-};
-
-struct OpenClTexture
-{
- OpenClTexture() = default;
- explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {}
-
- cl_mem memobj = nullptr;
- // TODO(akulik): should it specify texture format?
-};
-
-struct CpuMemory
-{
- CpuMemory() = default;
- CpuMemory(void *new_data, size_t new_size_bytes) : data(new_data), size_bytes(new_size_bytes) {}
-
- void *data = nullptr;
- size_t size_bytes = 0;
-};
-
-template <typename T> inline CpuMemory MakeCpuMemory(absl::Span<T> t)
-{
- CpuMemory m;
- m.data = t.data();
- m.size_bytes = t.size() * sizeof(T);
- return m;
-}
-
-template <typename T> inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t)
-{
- CpuMemory m;
- m.data = const_cast<T *>(t.data());
- m.size_bytes = t.size() * sizeof(T);
- return m;
-}
-
-// Defines object representation.
-struct ObjectDef
-{
- DataType data_type = DataType::UNKNOWN;
- DataLayout data_layout = DataLayout::UNKNOWN;
- ObjectType object_type = ObjectType::UNKNOWN;
-
- // If true, then object is managed externally and needs to be provided to
- // InferenceRunner by a user before running inference.
- //
- // User-provided objects will not be re-used internally for any purpose to
- // lower overall memory usage.
- bool user_provided = false;
-
- bool operator==(const ObjectDef &other) const
- {
- return data_type == other.data_type && data_layout == other.data_layout &&
- object_type == other.object_type && user_provided == other.user_provided;
- }
-};
-
-bool IsValid(const ObjectDef &def);
-
-struct Dimensions
-{
- Dimensions() : b(1), h(1), w(1), c(1) {}
-
- Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
- : b(batch), h(height), w(width), c(channels)
- {
- }
-
- int32_t d() const { return DivideRoundUp(c, 4); }
-
- int32_t product() const { return b * h * w * c; }
-
- bool operator==(const Dimensions &other) const
- {
- return b == other.b && h == other.h && w == other.w && c == other.c;
- }
-
- int32_t b;
- int32_t h;
- int32_t w;
- int32_t c;
-};
-
-// Connects tensor shape with corresponding object definition.
-struct TensorObjectDef
-{
- // Dimensions semantic is defined by corresponding DataLayout.
- Dimensions dimensions;
- ObjectDef object_def;
-
- bool operator==(const TensorObjectDef &other) const
- {
- return dimensions == other.dimensions && object_def == other.object_def;
- }
-};
-
-// @return true if tensor object def is defined.
-bool IsValid(const TensorObjectDef &def);
-
-// @return the number of elements in a tensor object.
-uint32_t NumElements(const TensorObjectDef &def);
-
-using TensorObject = absl::variant<absl::monostate, CpuMemory, OpenClBuffer, OpenClTexture>;
-
-// @return true if object is set and corresponding values are defined.
-bool IsValid(const TensorObjectDef &def, const TensorObject &object);
-
-ObjectType GetType(const TensorObject &object);
-
-// @return true if corresponding object is set for the given type
-bool IsObjectPresent(ObjectType type, const TensorObject &obj);
-
-class InferenceRunner;
-
-// Allows to inspect and change input and output definitions before a graph is
-// prepared for the inference.
-class InferenceBuilder
-{
-public:
- virtual ~InferenceBuilder() {}
-
- // Returns inference graph inputs and outputs definitions.
- virtual std::vector<TensorObjectDef> inputs() const = 0;
- virtual std::vector<TensorObjectDef> outputs() const = 0;
-
- // Sets new shape for the input if underlying implementation and graph
- // structure allows dynamic tensors.
- virtual absl::Status SetInputShape(int index, const Dimensions &dimensions) = 0;
-
- // Updates object definitions for the given index. Implementation may allow
- // to use different layouts and/or data type conversions between objects
- // defined in a graph and given objects, for example:
- // input '0' is DataType::FLOAT32, DataLayout::BHWC.
- // A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
- // An implementation may allow this transformation to happen automatically
- // under the hood.
- virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
- virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
- virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def)
- {
- auto input_defs = inputs();
- for (size_t i = 0; i < input_defs.size(); ++i)
- {
- RETURN_IF_ERROR(SetInputObjectDef(i, def));
- }
- return absl::OkStatus();
- }
- virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def)
- {
- auto output_defs = outputs();
- for (size_t i = 0; i < output_defs.size(); ++i)
- {
- RETURN_IF_ERROR(SetOutputObjectDef(i, def));
- }
- return absl::OkStatus();
- }
-
- // Creates new instance of the inference runner. InferenceBuilder stays valid
- // and could be used to create another inference runner if needed.
- //
- // This method may take significant time to prepare new inference runner. For
- // example, it may require to compile OpenGL shaders.
- virtual absl::Status Build(std::unique_ptr<InferenceRunner> *runner) = 0;
-};
-
-// Runs prepared inference. Every object marked as external needs to be set
-// prior calling Run method.
-class InferenceRunner
-{
-public:
- virtual ~InferenceRunner() {}
-
- // Returns inference graph inputs and outputs definitions.
- virtual std::vector<TensorObjectDef> inputs() const = 0;
- virtual std::vector<TensorObjectDef> outputs() const = 0;
-
- // Getters provide access to underlying objects for the given index.
- // Setters allow to set or change external object for the given index. Note,
- // object need to match object definition set before in InferenceBuilder.
-
- virtual absl::Status GetInputObject(int index, TensorObject *object) = 0;
- virtual absl::Status GetOutputObject(int index, TensorObject *object) = 0;
- virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
- virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
-
- virtual absl::Status Run() = 0;
-};
-
-// Encapsulated compilation/runtime tradeoffs.
-enum class InferenceUsage
-{
- UNKNOWN,
-
- // InferenceRunner will be used only once. Therefore, it is important to
- // minimize bootstrap time as well.
- FAST_SINGLE_ANSWER,
-
- // Prefer maximizing the throughput. Same inference runner will be used
- // repeatedly on different inputs.
- SUSTAINED_SPEED,
-};
-
-// Defines aspects to control while instantiating a runner.
-enum class InferencePriority
-{
- UNKNOWN,
-
- AUTO,
-
- MIN_LATENCY,
-
- MAX_PRECISION,
-
- MIN_MEMORY_USAGE,
-};
-
-struct InferenceOptions
-{
- InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED;
-
- // Ordered priorities provide better understanding of desired semantics,
- // where priority(n) is more important than priority(n+1).
- // AUTO priority is needed when a single priority is the most important
- // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving
- // everything else to AUTO would result in configuration that achieves maximum
- // performance.
- //
- // AUTO priority can only be used when higher priorities are fully specified.
- // For example:
- // VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
- // VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
- // priority3 = AUTO
- // INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
- // INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
- // priority3 = MAX_PRECISION
- // Invalid priorities will result in error.
- InferencePriority priority1 = InferencePriority::MAX_PRECISION;
-
- InferencePriority priority2 = InferencePriority::AUTO;
-
- InferencePriority priority3 = InferencePriority::AUTO;
-};
-
-// Returns a position number for the priority. If priority is missing,
-// then it it would return 'max num priorities + 1'.
-int GetPosition(const InferenceOptions &options, InferencePriority p);
-
-// Return true if options are valid.
-bool IsValid(const InferenceOptions &options);
-
-// Resolves AUTO priorities and specifies them explicitly.
-// Note, no-one should assume that these mappings will not change.
-// Technically this function is declared here for code re-use purposes and
-// by no means it should be treated as canonical way to resolve AUTO.
-void ResolveAutoPriority(InferenceOptions *options);
-
-enum class PriorityImportance
-{
- UNKNOWN,
- HIGHER,
- LOWER,
-};
-
-// If both p1 and p2 are not present in options, return UNKNOWN
-// If p1 is present, but p2 is not, return HIGHER
-// If p2 is present, but p1 is not, return LOWER
-// If both are present, and p1 is more important, return HIGHER, otherwise,
-// LOWER.
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
- InferencePriority p2);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc b/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc
deleted file mode 100644
index a7f86bffc..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Arguments.h"
-
-#include "absl/strings/ascii.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_replace.h"
-#include "absl/strings/str_split.h"
-#include "absl/strings/substitute.h"
-
-#include "AccessType.h"
-#include "TensorType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-bool IsWordSymbol(char symbol) { return absl::ascii_isalnum(symbol) || symbol == '_'; }
-
-std::string GetNextWord(const std::string &code, size_t first_position)
-{
- size_t pos = first_position;
- char t = code[pos];
- while (IsWordSymbol(t))
- {
- pos++;
- t = code[pos];
- }
- return code.substr(first_position, pos - first_position);
-}
-
-size_t FindEnclosingBracket(const std::string &text, size_t first_pos, char bracket)
-{
- const std::map<char, char> brackets = {
- {'(', ')'},
- {'{', '}'},
- {'[', ']'},
- {'<', '>'},
- };
- char b_open = bracket;
- auto it = brackets.find(b_open);
- if (it == brackets.end())
- {
- return -1;
- }
- char b_close = it->second;
- size_t pos = first_pos;
- int opened = 1;
- int closed = 0;
- while (opened != closed && pos < text.size())
- {
- if (text[pos] == b_open)
- {
- opened++;
- }
- else if (text[pos] == b_close)
- {
- closed++;
- }
- pos++;
- }
- if (opened == closed)
- {
- return pos;
- }
- else
- {
- return -1;
- }
-}
-
-absl::Status ParseArgsInsideBrackets(const std::string &text, size_t open_bracket_pos,
- size_t *close_bracket_pos, std::vector<std::string> *args)
-{
- *close_bracket_pos = FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
- if (*close_bracket_pos == static_cast<size_t>(-1))
- {
- return absl::NotFoundError("Not found enclosing bracket");
- }
- std::string str_args =
- text.substr(open_bracket_pos + 1, *close_bracket_pos - open_bracket_pos - 2);
- std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
- args->reserve(words.size());
- for (const auto &word : words)
- {
- absl::string_view arg = absl::StripAsciiWhitespace(word);
- if (!arg.empty())
- {
- args->push_back(std::string(arg));
- }
- }
- return absl::OkStatus();
-}
-
-void ReplaceAllWords(const std::string &old_word, const std::string &new_word, std::string *str)
-{
- size_t position = str->find(old_word);
- while (position != std::string::npos)
- {
- char prev = position == 0 ? '.' : (*str)[position - 1];
- char next = position + old_word.size() < str->size() ? (*str)[position + old_word.size()] : '.';
- if (IsWordSymbol(prev) || IsWordSymbol(next))
- {
- position = str->find(old_word, position + 1);
- continue;
- }
- str->replace(position, old_word.size(), new_word);
- position = str->find(old_word, position + new_word.size());
- }
-}
-
-std::string RenameArg(const std::vector<std::string> &object_names, const std::string &postfix,
- const std::string &arg_name)
-{
- for (const auto &object_name : object_names)
- {
- if (absl::StartsWith(arg_name, object_name) && arg_name.size() > object_name.size() &&
- arg_name[object_name.size()] == '_')
- {
- return object_name + postfix +
- arg_name.substr(object_name.size(), arg_name.size() - object_name.size());
- }
- }
- return arg_name + postfix;
-}
-
-void AppendArgument(const std::string &arg, std::string *args)
-{
- if (!args->empty())
- {
- absl::StrAppend(args, ",\n ");
- }
- absl::StrAppend(args, arg);
-}
-
-std::string GetImageModifier(AccessType access)
-{
- switch (access)
- {
- case AccessType::READ:
- return "__read_only";
- case AccessType::WRITE:
- return "__write_only";
- case AccessType::READ_WRITE:
- return "__read_write";
- default:
- throw std::runtime_error("Invalid AccessType");
- }
-}
-
-std::string GetDefaultSamplers(const DeviceInfo &device_info)
-{
- std::string result;
- result += "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
- "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
- if (device_info.IsAdreno3xx())
- {
- // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
- // we can observe huge register overhead when compared to other modes.
-
- // While using CLK_ADDRESS_NONE with out-of-range image coordinates is
- // undefined in the OpenCL specification, we have observed that
- // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
- // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
- // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
- // 3xx.
- result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
- "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
- }
- else
- {
- result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
- "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
- }
-
- return result;
-}
-
-} // namespace
-
-// Static
-constexpr char Arguments::kArgsPrefix[];
-
-Arguments::Arguments(Arguments &&args)
- : int_values_(std::move(args.int_values_)),
- shared_int4s_data_(std::move(args.shared_int4s_data_)),
- float_values_(std::move(args.float_values_)),
- shared_float4s_data_(std::move(args.shared_float4s_data_)), buffers_(std::move(args.buffers_)),
- images2d_(std::move(args.images2d_)), image2d_arrays_(std::move(args.image2d_arrays_)),
- images3d_(std::move(args.images3d_)), image_buffers_(std::move(args.image_buffers_)),
- custom_memories_(std::move(args.custom_memories_)), object_refs_(std::move(args.object_refs_)),
- objects_(std::move(args.objects_))
-{
-}
-Arguments &Arguments::operator=(Arguments &&args)
-{
- if (this != &args)
- {
- int_values_ = std::move(args.int_values_);
- shared_int4s_data_ = std::move(args.shared_int4s_data_);
- float_values_ = std::move(args.float_values_);
- shared_float4s_data_ = std::move(args.shared_float4s_data_);
- buffers_ = std::move(args.buffers_);
- images2d_ = std::move(args.images2d_);
- image2d_arrays_ = std::move(args.image2d_arrays_);
- images3d_ = std::move(args.images3d_);
- image_buffers_ = std::move(args.image_buffers_);
- custom_memories_ = std::move(args.custom_memories_);
- object_refs_ = std::move(args.object_refs_);
- objects_ = std::move(args.objects_);
- }
- return *this;
-}
-
-void Arguments::AddFloat(const std::string &name, float value)
-{
- float_values_[name].value = value;
-}
-void Arguments::AddInt(const std::string &name, int value) { int_values_[name].value = value; }
-void Arguments::AddBuffer(const std::string &name, const GPUBufferDescriptor &desc)
-{
- buffers_[name] = desc;
-}
-void Arguments::AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc)
-{
- images2d_[name] = desc;
-}
-
-void Arguments::AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc)
-{
- image2d_arrays_[name] = desc;
-}
-
-void Arguments::AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc)
-{
- images3d_[name] = desc;
-}
-
-void Arguments::AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc)
-{
- image_buffers_[name] = desc;
-}
-
-void Arguments::AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc)
-{
- custom_memories_[name] = desc;
-}
-
-void Arguments::AddObjectRef(const std::string &name, AccessType access_type,
- GPUObjectDescriptorPtr &&descriptor_ptr)
-{
- descriptor_ptr->SetAccess(access_type);
- object_refs_[name] = {std::move(descriptor_ptr)};
-}
-
-void Arguments::AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr)
-{
- descriptor_ptr->SetAccess(AccessType::READ);
- objects_[name] = {nullptr, std::move(descriptor_ptr)};
-}
-
-void Arguments::AddGPUResources(const std::string &name, const GPUResources &resources)
-{
- for (const auto &r : resources.ints)
- {
- AddInt(absl::StrCat(name, "_", r));
- }
- for (const auto &r : resources.floats)
- {
- AddFloat(absl::StrCat(name, "_", r));
- }
- for (const auto &r : resources.buffers)
- {
- AddBuffer(absl::StrCat(name, "_", r.first), r.second);
- }
- for (const auto &r : resources.images2d)
- {
- AddImage2D(absl::StrCat(name, "_", r.first), r.second);
- }
- for (const auto &r : resources.image2d_arrays)
- {
- AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
- }
- for (const auto &r : resources.images3d)
- {
- AddImage3D(absl::StrCat(name, "_", r.first), r.second);
- }
- for (const auto &r : resources.image_buffers)
- {
- AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
- }
- for (const auto &r : resources.custom_memories)
- {
- AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
- }
-}
-
-absl::Status Arguments::SetInt(const std::string &name, int value)
-{
- auto it = int_values_.find(name);
- if (it == int_values_.end())
- {
- return absl::NotFoundError(absl::StrCat("No int argument with name - ", name));
- }
- it->second.value = value;
- if (it->second.active)
- {
- shared_int4s_data_[it->second.offset] = value;
- }
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetFloat(const std::string &name, float value)
-{
- auto it = float_values_.find(name);
- if (it == float_values_.end())
- {
- return absl::NotFoundError(absl::StrCat("No float argument with name - ", name));
- }
- it->second.value = value;
- if (it->second.active)
- {
- shared_float4s_data_[it->second.offset] = value;
- }
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2D(const std::string &name, cl_mem memory)
-{
- auto it = images2d_.find(name);
- if (it == images2d_.end())
- {
- return absl::NotFoundError(absl::StrCat("No image2D argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetBuffer(const std::string &name, cl_mem memory)
-{
- auto it = buffers_.find(name);
- if (it == buffers_.end())
- {
- return absl::NotFoundError(absl::StrCat("No buffer argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2DArray(const std::string &name, cl_mem memory)
-{
- auto it = image2d_arrays_.find(name);
- if (it == image2d_arrays_.end())
- {
- return absl::NotFoundError(absl::StrCat("No image2D array argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage3D(const std::string &name, cl_mem memory)
-{
- auto it = images3d_.find(name);
- if (it == images3d_.end())
- {
- return absl::NotFoundError(absl::StrCat("No image3D argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImageBuffer(const std::string &name, cl_mem memory)
-{
- auto it = image_buffers_.find(name);
- if (it == image_buffers_.end())
- {
- return absl::NotFoundError(absl::StrCat("No image buffer argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetCustomMemory(const std::string &name, cl_mem memory)
-{
- auto it = custom_memories_.find(name);
- if (it == custom_memories_.end())
- {
- return absl::NotFoundError(absl::StrCat("No custom memory argument with name - ", name));
- }
- it->second.memory = memory;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::SetObjectRef(const std::string &name, const GPUObject *object)
-{
- auto it = object_refs_.find(name);
- if (it == object_refs_.end())
- {
- return absl::NotFoundError(absl::StrCat("No object ref with name - ", name));
- }
- GPUResourcesWithValue resources;
- RETURN_IF_ERROR(object->GetGPUResources(it->second.descriptor.get(), &resources));
- return SetGPUResources(name, resources);
-}
-
-absl::Status Arguments::SetGPUResources(const std::string &name,
- const GPUResourcesWithValue &resources)
-{
- for (const auto &r : resources.ints)
- {
- RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.floats)
- {
- RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.buffers)
- {
- RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.images2d)
- {
- RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.image2d_arrays)
- {
- RETURN_IF_ERROR(SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.images3d)
- {
- RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.image_buffers)
- {
- RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
- }
- for (const auto &r : resources.custom_memories)
- {
- RETURN_IF_ERROR(SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
- }
- return absl::OkStatus();
-}
-void Arguments::RenameArgs(const std::string &postfix, std::string *code) const
-{
- size_t next_position = code->find(kArgsPrefix);
- while (next_position != std::string::npos)
- {
- size_t arg_pos = next_position + strlen(kArgsPrefix);
- std::string arg_name = GetNextWord(*code, arg_pos);
- code->replace(arg_pos, arg_name.size(), arg_name + postfix);
- next_position = code->find(kArgsPrefix, arg_pos + arg_name.size());
- }
-}
-
-absl::Status Arguments::Merge(Arguments &&args, const std::string &postfix)
-{
- std::vector<std::string> object_names;
- object_names.reserve(args.object_refs_.size() + args.objects_.size());
- for (auto &v : args.object_refs_)
- {
- object_names.push_back(v.first);
- const std::string name = v.first + postfix;
- if (object_refs_.find(name) != object_refs_.end())
- {
- return absl::InvalidArgumentError(
- absl::StrCat("Object reference name collision. Name - ", name));
- }
- object_refs_[name] = {std::move(v.second.descriptor)};
- }
- for (auto &v : args.objects_)
- {
- object_names.push_back(v.first);
- const std::string name = v.first + postfix;
- if (objects_.find(name) != objects_.end())
- {
- return absl::InvalidArgumentError(absl::StrCat("Object name collision. Name - ", name));
- }
- objects_[name] = {std::move(v.second.obj_ptr), std::move(v.second.descriptor)};
- }
- for (const auto &v : args.int_values_)
- {
- AddInt(RenameArg(object_names, postfix, v.first), v.second.value);
- }
- for (const auto &v : args.float_values_)
- {
- AddFloat(RenameArg(object_names, postfix, v.first), v.second.value);
- }
- for (const auto &v : args.buffers_)
- {
- AddBuffer(RenameArg(object_names, postfix, v.first), v.second);
- }
- for (const auto &v : args.images2d_)
- {
- AddImage2D(RenameArg(object_names, postfix, v.first), v.second);
- }
- for (const auto &v : args.image2d_arrays_)
- {
- AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second);
- }
- for (const auto &v : args.images3d_)
- {
- AddImage3D(RenameArg(object_names, postfix, v.first), v.second);
- }
- for (const auto &v : args.image_buffers_)
- {
- AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second);
- }
- for (const auto &v : args.custom_memories_)
- {
- AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second);
- }
- return absl::OkStatus();
-}
-
-absl::Status Arguments::TransformToCLCode(const DeviceInfo &device_info,
- const std::map<std::string, std::string> &linkables,
- std::string *code)
-{
- RETURN_IF_ERROR(AddObjectArgs());
- RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code));
- ResolveArgsPass(device_info, code);
- *code = absl::Substitute(*code, GetListOfArgs());
- *code = GetDefaultSamplers(device_info) + *code;
- return absl::OkStatus();
-}
-
-std::string Arguments::GetListOfArgs()
-{
- std::string result;
- for (auto &t : buffers_)
- {
- const std::string type_name = t.second.data_type == DataType::FLOAT32 ? "float" : "half";
- std::string attributes;
- for (const auto &attr : t.second.attributes)
- {
- attributes += absl::StrCat(" __attribute__((", attr, "))");
- }
- AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
- ToCLDataType(t.second.data_type, t.second.element_size), "* ",
- t.first, attributes),
- &result);
- }
- for (auto &t : image_buffers_)
- {
- AppendArgument(
- absl::StrCat(GetImageModifier(t.second.access_type), " image1d_buffer_t ", t.first), &result);
- }
- for (auto &t : images2d_)
- {
- AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image2d_t ", t.first),
- &result);
- }
- for (auto &t : image2d_arrays_)
- {
- AppendArgument(
- absl::StrCat(GetImageModifier(t.second.access_type), " image2d_array_t ", t.first), &result);
- }
- for (auto &t : images3d_)
- {
- AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image3d_t ", t.first),
- &result);
- }
- for (auto &t : custom_memories_)
- {
- AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
- }
- for (uint32_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
- {
- AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
- }
- for (uint32_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
- {
- AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
- }
- return result;
-}
-
-absl::Status Arguments::Bind(cl_kernel kernel, int offset)
-{
- for (auto &t : buffers_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (auto &t : image_buffers_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (auto &t : images2d_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (auto &t : image2d_arrays_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (auto &t : images3d_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (auto &t : custom_memories_)
- {
- const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (size_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
- {
- const int error_code =
- clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_int4s_data_[i * 4]);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- for (size_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
- {
- const int error_code =
- clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_float4s_data_[i * 4]);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- offset, ")"));
- }
- offset++;
- }
- return absl::OkStatus();
-}
-
-std::string Arguments::AddActiveArgument(const std::string &arg_name, bool)
-{
- {
- auto it = int_values_.find(arg_name);
- if (it != int_values_.end())
- {
- int int_index;
- if (it->second.active)
- {
- int_index = it->second.offset;
- }
- else
- {
- it->second.active = true;
- it->second.offset = shared_int4s_data_.size();
- int_index = it->second.offset;
- shared_int4s_data_.push_back(it->second.value);
- }
- std::string index = std::to_string(int_index / 4);
- std::string postfixes[4] = {"x", "y", "z", "w"};
- return "shared_int4_" + index + "." + postfixes[int_index % 4];
- }
- }
- {
- auto it = float_values_.find(arg_name);
- if (it != float_values_.end())
- {
- int float_index;
- if (it->second.active)
- {
- float_index = it->second.offset;
- }
- else
- {
- it->second.active = true;
- it->second.offset = shared_float4s_data_.size();
- float_index = it->second.offset;
- shared_float4s_data_.push_back(it->second.value);
- }
- std::string index = std::to_string(float_index / 4);
- std::string postfixes[4] = {"x", "y", "z", "w"};
- return "shared_float4_" + index + "." + postfixes[float_index % 4];
- }
- }
- return arg_name;
-}
-
-void Arguments::ResolveArgsPass(const DeviceInfo &device_info, std::string *code)
-{
- bool use_f32_for_half_arguments = device_info.IsPowerVR();
- size_t position = 0;
- size_t next_position = code->find(kArgsPrefix);
- while (next_position != std::string::npos)
- {
- size_t arg_pos = next_position;
- next_position += strlen(kArgsPrefix);
- std::string object_name = GetNextWord(*code, next_position);
- std::string new_name = AddActiveArgument(object_name, use_f32_for_half_arguments);
- code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
- position = arg_pos + new_name.size();
- next_position = code->find(kArgsPrefix, position);
- }
-
- int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
- shared_int4s_data_.resize(shared_int4s_aligned_size);
- int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
- shared_float4s_data_.resize(shared_float4s_aligned_size);
-}
-
-void Arguments::ResolveObjectNames(const std::string &object_name,
- const std::vector<std::string> &member_names, std::string *code)
-{
- for (const auto &member_name : member_names)
- {
- const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
- ReplaceAllWords(member_name, new_name, code);
- }
-}
-
-GPUObjectDescriptor *Arguments::GetObjectDescriptor(const std::string &object_name) const
-{
- {
- auto it = object_refs_.find(object_name);
- if (it != object_refs_.end())
- {
- return it->second.descriptor.get();
- }
- }
- {
- auto it = objects_.find(object_name);
- if (it != objects_.end())
- {
- return it->second.descriptor.get();
- }
- }
- return nullptr;
-}
-
-absl::Status Arguments::ResolveSelector(const std::map<std::string, std::string> &linkables,
- const std::string &object_name, const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result)
-{
- const GPUObjectDescriptor *desc_ptr = GetObjectDescriptor(object_name);
- if (!desc_ptr)
- {
- return absl::NotFoundError(absl::StrCat("No object with name - ", object_name));
- }
- auto names = desc_ptr->GetGPUResources().GetNames();
- const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(desc_ptr);
- if (tensor_desc && selector == "Write")
- {
- auto it = linkables.find(object_name);
- if (it != linkables.end())
- {
- if (desc_ptr->GetAccess() != AccessType::WRITE &&
- desc_ptr->GetAccess() != AccessType::READ_WRITE)
- {
- return absl::FailedPreconditionError(
- absl::StrCat("Object with name - ", object_name, " should have Write access."));
- }
- std::string value_name, x_coord, y_coord, s_coord;
- RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(args, &value_name, &x_coord,
- &y_coord, &s_coord));
- // x_coord can have batch size property of link_object
- ResolveObjectNames(object_name, names, &x_coord);
- *result = it->second;
- ReplaceAllWords("in_out_value", value_name, result);
- ReplaceAllWords("X_COORD", x_coord, result);
- ReplaceAllWords("Y_COORD", y_coord, result);
- ReplaceAllWords("S_COORD", s_coord, result);
- RETURN_IF_ERROR(ResolveSelectorsPass({}, result));
- }
- }
- std::string patch;
- RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, args, template_args, &patch));
- ResolveObjectNames(object_name, names, &patch);
- *result += patch;
- return absl::OkStatus();
-}
-
-absl::Status Arguments::ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
- std::string *code)
-{
- std::string result;
- size_t position = 0;
- size_t next_position = code->find(kArgsPrefix);
- while (next_position != std::string::npos)
- {
- size_t arg_pos = next_position;
- next_position += strlen(kArgsPrefix);
- std::string object_name = GetNextWord(*code, next_position);
- char next = (*code)[next_position + object_name.size()];
- if (next == '.')
- {
- next_position += object_name.size() + 1;
- std::string selector_name = GetNextWord(*code, next_position);
- next_position += selector_name.size();
- next = (*code)[next_position];
- std::vector<std::string> template_args;
- if (next == '<')
- {
- size_t close_bracket_pos;
- RETURN_IF_ERROR(
- ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &template_args));
- next_position = close_bracket_pos;
- next = (*code)[next_position];
- }
- if (next != '(')
- {
- return absl::NotFoundError(
- absl::StrCat("Expected ( after ", object_name, ".", selector_name, " call"));
- }
- std::vector<std::string> args;
- size_t close_bracket_pos;
- RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &args));
- for (auto &arg : args)
- {
- RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg));
- }
- std::string patch;
- RETURN_IF_ERROR(
- ResolveSelector(linkables, object_name, selector_name, args, template_args, &patch));
- code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
- position = arg_pos + patch.size();
- }
- else
- {
- position = arg_pos + strlen(kArgsPrefix);
- }
- next_position = code->find(kArgsPrefix, position);
- }
- return absl::OkStatus();
-}
-
-absl::Status Arguments::AllocateObjects(CLContext *context)
-{
- for (auto &t : objects_)
- {
- RETURN_IF_ERROR(t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr));
- }
- return absl::OkStatus();
-}
-
-void Arguments::ReleaseCPURepresentation()
-{
- for (auto &t : objects_)
- {
- t.second.descriptor->Release();
- }
-}
-
-absl::Status Arguments::AddObjectArgs()
-{
- for (auto &t : objects_)
- {
- AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
- GPUResourcesWithValue resources;
- RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(), &resources));
- RETURN_IF_ERROR(SetGPUResources(t.first, resources));
- }
- for (auto &t : object_refs_)
- {
- AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
- }
- return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h b/runtime/onert/backend/gpu_cl/open_cl/Arguments.h
deleted file mode 100644
index 0c6ce1edf..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "ClDevice.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-
-#include "AccessType.h"
-#include "Types.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ArgumentsBinder
-{
-public:
- virtual absl::Status SetInt(const std::string &name, int value) = 0;
- virtual absl::Status SetFloat(const std::string &name, float value) = 0;
- virtual ~ArgumentsBinder() = default;
-};
-
-class Arguments : public ArgumentsBinder
-{
-public:
- Arguments() = default;
- void AddFloat(const std::string &name, float value = 0.0f);
- void AddInt(const std::string &name, int value = 0);
- void AddObjectRef(const std::string &name, AccessType access_type,
- GPUObjectDescriptorPtr &&descriptor_ptr);
- void AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr);
-
- absl::Status SetInt(const std::string &name, int value) override;
- absl::Status SetFloat(const std::string &name, float value) override;
- absl::Status SetObjectRef(const std::string &name, const GPUObject *object);
-
- absl::Status Bind(cl_kernel kernel, int offset = 0);
-
- void RenameArgs(const std::string &postfix, std::string *code) const;
- absl::Status Merge(Arguments &&args, const std::string &postfix);
-
- absl::Status AllocateObjects(CLContext *context);
- void ReleaseCPURepresentation();
- absl::Status TransformToCLCode(const DeviceInfo &device_info,
- const std::map<std::string, std::string> &linkables,
- std::string *code);
-
- // Move only
- Arguments(Arguments &&args);
- Arguments &operator=(Arguments &&args);
- Arguments(const Arguments &) = delete;
- Arguments &operator=(const Arguments &) = delete;
-
- ~Arguments() override = default;
-
-private:
- void AddBuffer(const std::string &name, const GPUBufferDescriptor &desc);
- void AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc);
- void AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc);
- void AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc);
- void AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc);
- void AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc);
-
- absl::Status SetImage2D(const std::string &name, cl_mem memory);
- absl::Status SetBuffer(const std::string &name, cl_mem memory);
- absl::Status SetImage2DArray(const std::string &name, cl_mem memory);
- absl::Status SetImage3D(const std::string &name, cl_mem memory);
- absl::Status SetImageBuffer(const std::string &name, cl_mem memory);
- absl::Status SetCustomMemory(const std::string &name, cl_mem memory);
-
- std::string GetListOfArgs();
-
- std::string AddActiveArgument(const std::string &arg_name, bool use_f32_for_halfs);
- void AddGPUResources(const std::string &name, const GPUResources &resources);
-
- absl::Status SetGPUResources(const std::string &name, const GPUResourcesWithValue &resources);
-
- absl::Status AddObjectArgs();
-
- void ResolveArgsPass(const DeviceInfo &device_info, std::string *code);
- absl::Status ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
- std::string *code);
-
- absl::Status ResolveSelector(const std::map<std::string, std::string> &linkables,
- const std::string &object_name, const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &template_args, std::string *result);
-
- void ResolveObjectNames(const std::string &object_name,
- const std::vector<std::string> &member_names, std::string *code);
-
- GPUObjectDescriptor *GetObjectDescriptor(const std::string &object_name) const;
-
- static constexpr char kArgsPrefix[] = "args.";
-
- struct IntValue
- {
- int value;
-
- // many uniforms generated automatically and not used
- // to reduce amount of data transferred we adding this optimization
- bool active = false;
-
- // offset to shared uniform storage.
- uint32_t offset = -1;
- };
- std::map<std::string, IntValue> int_values_;
- std::vector<int32_t> shared_int4s_data_;
-
- struct FloatValue
- {
- float value;
-
- // many uniforms generated automatically and not used
- // to reduce amount of data transferred we adding this optimization
- bool active = false;
-
- // offset to shared uniform storage.
- uint32_t offset = -1;
- };
- std::map<std::string, FloatValue> float_values_;
- std::vector<float> shared_float4s_data_;
-
- std::map<std::string, GPUBufferDescriptor> buffers_;
- std::map<std::string, GPUImage2DDescriptor> images2d_;
- std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
- std::map<std::string, GPUImage3DDescriptor> images3d_;
- std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
- std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
-
- struct ObjectRefArg
- {
- GPUObjectDescriptorPtr descriptor;
- };
- std::map<std::string, ObjectRefArg> object_refs_;
-
- struct ObjectArg
- {
- GPUObjectPtr obj_ptr;
- GPUObjectDescriptorPtr descriptor;
- };
- std::map<std::string, ObjectArg> objects_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc b/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc
deleted file mode 100644
index 64c071921..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Buffer.h"
-
-#include <string>
-
-#include "ClContext.h"
-#include "DataType.h"
-#include "GpuObject.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void *data,
- CLContext *context, Buffer *result)
-{
- cl_mem buffer;
- RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes, gpu_read_only,
- const_cast<void *>(data), &buffer));
- *result = Buffer(buffer, size_in_bytes);
-
- return absl::OkStatus();
-}
-
-} // namespace
-
-BufferDescriptor::BufferDescriptor(BufferDescriptor &&desc)
- : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
- element_size(desc.element_size), memory_type(desc.memory_type),
- attributes(std::move(desc.attributes)), size(desc.size), data(std::move(desc.data))
-{
-}
-
-BufferDescriptor &BufferDescriptor::operator=(BufferDescriptor &&desc)
-{
- if (this != &desc)
- {
- std::swap(element_type, desc.element_type);
- std::swap(element_size, desc.element_size);
- std::swap(memory_type, desc.memory_type);
- attributes = std::move(desc.attributes);
- std::swap(size, desc.size);
- data = std::move(desc.data);
- GPUObjectDescriptor::operator=(std::move(desc));
- }
- return *this;
-}
-
-void BufferDescriptor::Release() { data.clear(); }
-
-GPUResources BufferDescriptor::GetGPUResources() const
-{
- GPUResources resources;
- GPUBufferDescriptor desc;
- desc.data_type = element_type;
- desc.access_type = access_type_;
- desc.element_size = element_size;
- desc.memory_type = memory_type;
- desc.attributes = attributes;
- resources.buffers.push_back({"buffer", desc});
- return resources;
-}
-
-absl::Status BufferDescriptor::PerformSelector(const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const
-{
- if (selector == "Read")
- {
- return PerformReadSelector(args, result);
- }
- else if (selector == "GetPtr")
- {
- return PerformGetPtrSelector(args, template_args, result);
- }
- else
- {
- return absl::NotFoundError(
- absl::StrCat("BufferDescriptor don't have selector with name - ", selector));
- }
-}
-
-absl::Status BufferDescriptor::PerformReadSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (args.size() != 1)
- {
- return absl::NotFoundError(
- absl::StrCat("BufferDescriptor Read require one argument, but ", args.size(), " was passed"));
- }
- *result = absl::StrCat("buffer[", args[0], "]");
- return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::PerformGetPtrSelector(const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const
-{
- if (args.size() > 1)
- {
- return absl::NotFoundError(absl::StrCat(
- "BufferDescriptor GetPtr require one or zero arguments, but ", args.size(), " was passed"));
- }
- if (template_args.size() > 1)
- {
- return absl::NotFoundError(absl::StrCat("BufferDescriptor GetPtr require one or zero teemplate "
- "arguments, but ",
- template_args.size(), " was passed"));
- }
- std::string conversion;
- if (template_args.size() == 1)
- {
- const std::string type_name = ToCLDataType(element_type, element_size);
- if (type_name != template_args[0])
- {
- conversion = absl::StrCat("(", MemoryTypeToCLType(memory_type), " ", template_args[0], "*)&");
- }
- }
- if (args.empty())
- {
- *result = absl::StrCat(conversion, "buffer");
- }
- else if (conversion.empty())
- {
- *result = absl::StrCat("(buffer + ", args[0], ")");
- }
- else
- {
- *result = absl::StrCat(conversion, "buffer[", args[0], "]");
- }
- return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
- Buffer gpu_buffer;
- RETURN_IF_ERROR(gpu_buffer.CreateFromBufferDescriptor(*this, context));
- *result = absl::make_unique<Buffer>(std::move(gpu_buffer));
- return absl::OkStatus();
-}
-
-Buffer::Buffer(cl_mem buffer, size_t size_in_bytes) : buffer_(buffer), size_(size_in_bytes) {}
-
-Buffer::Buffer(Buffer &&buffer) : buffer_(buffer.buffer_), size_(buffer.size_)
-{
- buffer.buffer_ = nullptr;
- buffer.size_ = 0;
-}
-
-Buffer &Buffer::operator=(Buffer &&buffer)
-{
- if (this != &buffer)
- {
- Release();
- std::swap(size_, buffer.size_);
- std::swap(buffer_, buffer.buffer_);
- }
- return *this;
-}
-
-void Buffer::Release()
-{
- if (buffer_)
- {
- clReleaseMemObject(buffer_);
- buffer_ = nullptr;
- size_ = 0;
- }
-}
-
-absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const
-{
- const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
- if (!buffer_desc)
- {
- return absl::InvalidArgumentError("Expected BufferDescriptor on input.");
- }
-
- resources->buffers.push_back({"buffer", buffer_});
- return absl::OkStatus();
-}
-
-absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context)
-{
- bool read_only = desc.memory_type == MemoryType::CONSTANT;
- uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
- size_ = desc.size;
- return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr, &buffer_);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
- return CreateBuffer(size_in_bytes, true, nullptr, context, result);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
- Buffer *result)
-{
- return CreateBuffer(size_in_bytes, true, data, context, result);
-}
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
- return CreateBuffer(size_in_bytes, false, nullptr, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h b/runtime/onert/backend/gpu_cl/open_cl/Buffer.h
deleted file mode 100644
index 39e97be6d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct BufferDescriptor : public GPUObjectDescriptor
-{
- DataType element_type;
- int element_size;
- MemoryType memory_type = MemoryType::GLOBAL;
- std::vector<std::string> attributes;
-
- // optional
- int size = 0;
- std::vector<uint8_t> data;
-
- BufferDescriptor() = default;
- BufferDescriptor(const BufferDescriptor &) = default;
- BufferDescriptor &operator=(const BufferDescriptor &) = default;
- BufferDescriptor(BufferDescriptor &&desc);
- BufferDescriptor &operator=(BufferDescriptor &&desc);
-
- absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const override;
-
- GPUResources GetGPUResources() const override;
- absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
- absl::Status PerformGetPtrSelector(const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const;
-
- absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
- void Release() override;
-};
-
-// Buffer represent linear GPU data storage with arbitrary data format.
-// Buffer is moveable but not copyable.
-class Buffer : public GPUObject
-{
-public:
- Buffer() {} // just for using Buffer as a class members
- Buffer(cl_mem buffer, size_t size_in_bytes);
-
- // Move only
- Buffer(Buffer &&buffer);
- Buffer &operator=(Buffer &&buffer);
- Buffer(const Buffer &) = delete;
- Buffer &operator=(const Buffer &) = delete;
-
- virtual ~Buffer() { Release(); }
-
- // for profiling and memory statistics
- uint64_t GetMemorySizeInBytes() const { return size_; }
-
- cl_mem GetMemoryPtr() const { return buffer_; }
-
- // Writes data to a buffer. Data should point to a region that
- // has exact size in bytes as size_in_bytes(constructor parameter).
- template <typename T> absl::Status WriteData(CLCommandQueue *queue, const std::vector<T> *data);
-
- // Reads data from Buffer into CPU memory.
- template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
- absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const override;
-
- absl::Status CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context);
-
-private:
- void Release();
-
- cl_mem buffer_ = nullptr;
- size_t size_ = 0;
-};
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
- Buffer *result);
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc
deleted file mode 100644
index d147b7b13..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClCommandQueue.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-#include <limits>
-
-#include "absl/strings/str_cat.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-using namespace std;
-
-CLCommandQueue::CLCommandQueue(cl_command_queue queue, bool has_ownership)
- : queue_(queue), has_ownership_(has_ownership)
-{
-}
-
-CLCommandQueue::CLCommandQueue(CLCommandQueue &&queue)
- : queue_(queue.queue_), has_ownership_(queue.has_ownership_)
-{
- queue.queue_ = nullptr;
-}
-
-CLCommandQueue &CLCommandQueue::operator=(CLCommandQueue &&queue)
-{
- if (this != &queue)
- {
- Release();
- std::swap(queue_, queue.queue_);
- has_ownership_ = queue.has_ownership_;
- }
- return *this;
-}
-
-CLCommandQueue::~CLCommandQueue() { Release(); }
-
-void CLCommandQueue::Release()
-{
- if (has_ownership_ && queue_)
- {
- clReleaseCommandQueue(queue_);
- queue_ = nullptr;
- }
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size, CLEvent *event)
-{
- std::vector<size_t> local(3);
- std::vector<size_t> global(3);
- for (int i = 0; i < 3; ++i)
- {
- local[i] = work_group_size[i];
- global[i] = work_groups_count[i] * work_group_size[i];
- }
- cl_event resulting_event;
- const int error_code =
- clEnqueueNDRangeKernel(queue_, kernel.kernel(), 3, nullptr, global.data(), local.data(), 0,
- nullptr, event ? &resulting_event : nullptr);
- if (event)
- {
- *event = CLEvent(resulting_event);
- }
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to clEnqueueNDRangeKernel - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size)
-{
- return Dispatch(kernel, work_groups_count, work_group_size, nullptr);
-}
-
-absl::Status CLCommandQueue::EnqueueEvent(CLEvent *event)
-{
- cl_event resulting_event;
- const int error_code = clEnqueueMarker(queue_, &resulting_event);
- *event = CLEvent(resulting_event);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to clEnqueueMarker - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region, const void *data)
-{
- const size_t origin[] = {0, 0, 0};
- const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
- static_cast<size_t>(region.z)};
- auto error_code =
- clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ",
- CLErrorCodeToString(error_code)));
- }
-
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region, void *data)
-{
- const size_t origin[] = {0, 0, 0};
- const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
- static_cast<size_t>(region.z)};
- auto error_code =
- clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ",
- CLErrorCodeToString(error_code)));
- }
-
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
- const void *data)
-{
- auto error_code =
- clEnqueueWriteBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ",
- CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data)
-{
- auto error_code =
- clEnqueueReadBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ",
- CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::WaitForCompletion()
-{
- auto error_code = clFinish(queue_);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue) : CLCommandQueue(queue, true)
-{
- events_.reserve(128);
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(ProfilingCommandQueue &&queue)
- : CLCommandQueue(std::move(queue)), events_(std::move(queue.events_)),
- current_label_(std::move(queue.current_label_))
-{
-}
-
-ProfilingCommandQueue &ProfilingCommandQueue::operator=(ProfilingCommandQueue &&queue)
-{
- if (this != &queue)
- {
- events_ = std::move(queue.events_);
- current_label_ = std::move(queue.current_label_);
- CLCommandQueue::operator=(std::move(queue));
- }
- return *this;
-}
-
-void ProfilingCommandQueue::SetEventsLabel(const std::string &name) { current_label_ = name; }
-
-void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); }
-
-absl::Status ProfilingCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size)
-{
- events_.push_back(CLEvent());
- RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count, work_group_size,
- &events_[events_.size() - 1]));
- events_.back().SetName(current_label_);
- return absl::OkStatus();
-}
-
-absl::Status
-ProfilingCommandQueue::GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
- const std::vector<int3> &work_groups_count,
- const std::vector<int3> &work_group_sizes, int *index)
-{
- // Some Adreno 3xx can have wrong numbers for some events
- const bool possible_bug_with_events = device_info.IsAdreno3xx();
- events_.resize(work_group_sizes.size());
- for (size_t i = 0; i < work_group_sizes.size(); ++i)
- {
- RETURN_IF_ERROR(
- CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i]));
-
- // reducing the speed of memory leak on Mali for some kernels
- if (device_info.IsMali() && i % 8 == 7)
- {
- events_[i - 7].Wait();
- }
- if (possible_bug_with_events)
- {
- // We are trying to increase probability for correct result.
- RETURN_IF_ERROR(WaitForCompletion());
- }
- }
-
- RETURN_IF_ERROR(WaitForCompletion());
-
- // To release memory of some kernel pool on Mali.
- if (device_info.IsMali())
- {
- RETURN_IF_ERROR(kernel.ReInit());
- }
-
- int minimum_index = 0;
- double minimum_time = std::numeric_limits<double>::max();
- if (possible_bug_with_events)
- { // we will try to cut out suspicious results
- double average_time = 0.0;
- int average_samples_count = 0;
- for (size_t i = 0; i < work_group_sizes.size(); ++i)
- {
- if (events_[i].GetEventTimeMs() < 100 * 1000)
- { // 100 sec
- average_time += events_[i].GetEventTimeMs();
- average_samples_count++;
- }
- }
- if (average_samples_count == 0)
- {
- throw std::runtime_error("It cannot be divided by zero");
- }
- else
- {
- average_time /= average_samples_count;
- }
-
- for (size_t i = 0; i < work_group_sizes.size(); ++i)
- {
- double time = events_[i].GetEventTimeMs();
- if (time < minimum_time && time >= 0.1 * average_time)
- {
- minimum_index = i;
- minimum_time = time;
- }
- }
- }
- else
- {
- for (size_t i = 0; i < work_group_sizes.size(); ++i)
- {
- double time = events_[i].GetEventTimeMs();
- if (time < minimum_time)
- {
- minimum_index = i;
- minimum_time = time;
- }
- }
- }
-
- *index = minimum_index;
-
- return absl::OkStatus();
-}
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
- CLCommandQueue *result)
-{
- int error_code;
- cl_command_queue queue = clCreateCommandQueue(context.context(), device.id(), 0, &error_code);
- if (!queue)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
- }
- *result = CLCommandQueue(queue, true);
- return absl::OkStatus();
-}
-
-double ProfilingCommandQueue::GetQueueExecutionTimeMs() const
-{
- const uint64_t start = events_.front().GetStartedTimeNs();
- const uint64_t end = events_.back().GetFinishedTimeNs();
- const uint64_t time_ns = (end - start);
-
- return static_cast<double>(time_ns) / 1000000.0;
-}
-
-double ProfilingCommandQueue::GetSumOfEventsTimeMs() const
-{
- double sum = 0.0;
- for (uint32_t i = 0; i < events_.size(); ++i)
- {
- sum += events_[i].GetEventTimeMs();
- }
- return sum;
-}
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
- ProfilingCommandQueue *result)
-{
- int error_code;
- cl_command_queue queue =
- clCreateCommandQueue(context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code);
- if (!queue)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
- }
-
- *result = ProfilingCommandQueue(queue);
- return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h
deleted file mode 100644
index 81f93fd23..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/time/time.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "ClKernel.h"
-#include "OpenclWrapper.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ProfilingInfo
-{
- struct DispatchInfo
- {
- std::string label;
- absl::Duration duration;
- };
-
- std::vector<DispatchInfo> dispatches;
-
- absl::Duration GetTotalTime() const;
-
- // Returns report (string of lines delimited by \n)
- // This method uses GPU counters and measure GPU time only.
- // Report has next structure:
- // Per kernel timing(K kernels):
- // conv2d 3.2ms
- // ...
- // --------------------
- // Accumulated time per operation type:
- // conv2d - 14.5ms
- // ....
- // --------------------
- // Ideal total time: 23.4ms // Total time for all kernels
- std::string GetDetailedReport() const;
-};
-
-// A wrapper around opencl command queue
-class CLCommandQueue
-{
-public:
- CLCommandQueue() {}
- CLCommandQueue(cl_command_queue queue, bool has_ownership);
-
- // Move only
- CLCommandQueue(CLCommandQueue &&queue);
- CLCommandQueue &operator=(CLCommandQueue &&queue);
- CLCommandQueue(const CLCommandQueue &) = delete;
- CLCommandQueue &operator=(const CLCommandQueue &) = delete;
-
- virtual ~CLCommandQueue();
-
- cl_command_queue queue() const { return queue_; }
-
- virtual absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size);
-
- absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size, CLEvent *event);
-
- absl::Status EnqueueEvent(CLEvent *event);
-
- absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void *data);
- absl::Status EnqueueReadImage(cl_mem memory, int3 region, void *data);
-
- absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, const void *data);
- absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data);
-
- absl::Status WaitForCompletion();
-
-protected:
- void Release();
-
- cl_command_queue queue_ = nullptr;
- bool has_ownership_ = false;
-};
-
-class ProfilingCommandQueue : public CLCommandQueue
-{
-public:
- ProfilingCommandQueue() {}
- explicit ProfilingCommandQueue(cl_command_queue queue);
-
- // Move only
- ProfilingCommandQueue(ProfilingCommandQueue &&queue);
- ProfilingCommandQueue &operator=(ProfilingCommandQueue &&queue);
- ProfilingCommandQueue(const ProfilingCommandQueue &) = delete;
- ProfilingCommandQueue &operator=(const ProfilingCommandQueue &) = delete;
-
- absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
- const int3 &work_group_size) override;
-
- // will write index for fastest work_group among work_group_sizes
- absl::Status GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
- const std::vector<int3> &work_groups_count,
- const std::vector<int3> &work_group_sizes, int *index);
-
- // call ResetMeasurements() to start new seriese of measurements
- void ResetMeasurements();
-
- double GetQueueExecutionTimeMs() const;
-
- // Difference from GetQueueExecutionTimeMs is that this number doesn't include
- // time between kernels(kernels launches or preparing) on GPU. Usually, this
- // time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
- // spend on something else(maybe kernels launches or preparing)
- double GetSumOfEventsTimeMs() const;
-
- // This label will be used for all subsequent dispatches.
- void SetEventsLabel(const std::string &name);
-
-private:
- std::vector<CLEvent> events_;
- std::string current_label_;
-};
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
- CLCommandQueue *result);
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
- ProfilingCommandQueue *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc b/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc
deleted file mode 100644
index 3289ff914..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClContext.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClImageFormat.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context, cl_mem_flags flags)
-{
- cl_uint num_image_formats;
- cl_int error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, 0, nullptr,
- &num_image_formats);
- if (error != CL_SUCCESS)
- {
- return {};
- }
-
- std::vector<cl_image_format> result(num_image_formats);
- error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, num_image_formats,
- &result[0], nullptr);
- if (error != CL_SUCCESS)
- {
- return {};
- }
- return result;
-}
-
-bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, int num_channels)
-{
- return image_format.image_channel_data_type == ToImageChannelType(data_type) &&
- image_format.image_channel_order == ToChannelOrder(num_channels);
-}
-
-void AddSupportedImageFormats(cl_context context, DeviceInfo *info)
-{
- auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
- for (auto format : supported_formats)
- {
- info->supports_r_f16_tex2d =
- info->supports_r_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 1);
- info->supports_rg_f16_tex2d =
- info->supports_rg_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 2);
- info->supports_rgb_f16_tex2d =
- info->supports_rgb_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 3);
- info->supports_rgba_f16_tex2d =
- info->supports_rgba_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 4);
- info->supports_r_f32_tex2d =
- info->supports_r_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 1);
- info->supports_rg_f32_tex2d =
- info->supports_rg_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 2);
- info->supports_rgb_f32_tex2d =
- info->supports_rgb_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 3);
- info->supports_rgba_f32_tex2d =
- info->supports_rgba_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 4);
- }
-}
-
-absl::Status CreateCLContext(const CLDevice &device, cl_context_properties *properties,
- CLContext *result)
-{
- int error_code;
- cl_device_id device_id = device.id();
- cl_context context = clCreateContext(properties, 1, &device_id, nullptr, nullptr, &error_code);
- if (!context)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code)));
- }
- AddSupportedImageFormats(context, &device.info_);
-
- *result = CLContext(context, true);
- return absl::OkStatus();
-}
-
-} // namespace
-
-CLContext::CLContext(cl_context context, bool has_ownership)
- : context_(context), has_ownership_(has_ownership)
-{
-}
-
-CLContext::CLContext(CLContext &&context)
- : context_(context.context_), has_ownership_(context.has_ownership_)
-{
- context.context_ = nullptr;
-}
-
-CLContext &CLContext::operator=(CLContext &&context)
-{
- if (this != &context)
- {
- Release();
- std::swap(context_, context.context_);
- has_ownership_ = context.has_ownership_;
- }
- return *this;
-}
-
-CLContext::~CLContext() { Release(); }
-
-void CLContext::Release()
-{
- if (has_ownership_ && context_)
- {
- clReleaseContext(context_);
- context_ = nullptr;
- }
-}
-
-bool CLContext::IsFloatTexture2DSupported(int num_channels, DataType data_type,
- cl_mem_flags flags) const
-{
- auto supported_formats = GetSupportedImage2DFormats(context_, flags);
- for (auto format : supported_formats)
- {
- if (format.image_channel_data_type == ToImageChannelType(data_type) &&
- format.image_channel_order == ToChannelOrder(num_channels))
- {
- return true;
- }
- }
-
- return false;
-}
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result)
-{
- return CreateCLContext(device, nullptr, result);
-}
-
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
- cl_context_properties egl_display, CLContext *result)
-{
- if (!device.SupportsExtension("cl_khr_gl_sharing"))
- {
- return absl::UnavailableError("Device doesn't support CL-GL sharing.");
- }
- cl_context_properties platform = reinterpret_cast<cl_context_properties>(device.platform());
- cl_context_properties props[] = {CL_GL_CONTEXT_KHR,
- egl_context,
- CL_EGL_DISPLAY_KHR,
- egl_display,
- CL_CONTEXT_PLATFORM,
- platform,
- 0};
- return CreateCLContext(device, props, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h b/runtime/onert/backend/gpu_cl/open_cl/ClContext.h
deleted file mode 100644
index cf1d0d2d2..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl context
-class CLContext
-{
-public:
- CLContext() {}
- CLContext(cl_context context, bool has_ownership);
-
- // Move only
- CLContext(CLContext &&context);
- CLContext &operator=(CLContext &&context);
- CLContext(const CLContext &) = delete;
- CLContext &operator=(const CLContext &) = delete;
-
- ~CLContext();
-
- cl_context context() const { return context_; }
-
- bool IsFloatTexture2DSupported(int num_channels, DataType data_type,
- cl_mem_flags flags = CL_MEM_READ_WRITE) const;
-
-private:
- void Release();
-
- cl_context context_ = nullptr;
- bool has_ownership_ = false;
-};
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result);
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
- cl_context_properties egl_display, CLContext *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc
deleted file mode 100644
index 8dede139c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClDevice.h"
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Status.h"
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <> std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info)
-{
- size_t size;
- cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
- if (error != CL_SUCCESS)
- {
- return "";
- }
-
- std::string result(size - 1, 0);
- error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
- if (error != CL_SUCCESS)
- {
- return "";
- }
- return result;
-}
-
-namespace
-{
-template <typename T> T GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
- T result;
- cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
- if (error != CL_SUCCESS)
- {
- return -1;
- }
- return result;
-}
-
-std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
- size_t size;
- cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
- if (error != CL_SUCCESS)
- {
- return "";
- }
-
- std::string result(size - 1, 0);
- error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
- if (error != CL_SUCCESS)
- {
- return "";
- }
- return result;
-}
-
-void GetDeviceWorkDimsSizes(cl_device_id id, int3 *result)
-{
- int dims_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
- if (dims_count < 3)
- {
- return;
- }
- std::vector<size_t> limits(dims_count);
- cl_int error = clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * dims_count,
- limits.data(), nullptr);
- if (error != CL_SUCCESS)
- {
- return;
- }
- // dims_count must be at least 3 according to spec
- result->x = limits[0];
- result->y = limits[1];
- result->z = limits[2];
-}
-
-OpenCLVersion ParseCLVersion(const std::string &version)
-{
- const auto first_dot_pos = version.find_first_of('.');
- if (first_dot_pos == std::string::npos)
- {
- return OpenCLVersion::CL_1_0;
- }
- const int major = version[first_dot_pos - 1] - '0';
- const int minor = version[first_dot_pos + 1] - '0';
-
- if (major == 1)
- {
- if (minor == 2)
- {
- return OpenCLVersion::CL_1_2;
- }
- else if (minor == 1)
- {
- return OpenCLVersion::CL_1_1;
- }
- else
- {
- return OpenCLVersion::CL_1_0;
- }
- }
- else if (major == 2)
- {
- if (minor == 2)
- {
- return OpenCLVersion::CL_2_2;
- }
- else if (minor == 1)
- {
- return OpenCLVersion::CL_2_1;
- }
- else
- {
- return OpenCLVersion::CL_2_0;
- }
- }
- else if (major == 3)
- {
- return OpenCLVersion::CL_3_0;
- }
- else
- {
- return OpenCLVersion::CL_1_0;
- }
-}
-
-Vendor ParseVendor(const std::string &device_name, const std::string &vendor_name)
-{
- std::string d_name = device_name;
- std::string v_name = vendor_name;
- std::transform(d_name.begin(), d_name.end(), d_name.begin(), ::tolower);
- std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower);
- if (d_name.find("qualcomm") != std::string::npos || v_name.find("qualcomm") != std::string::npos)
- {
- return Vendor::kQualcomm;
- }
- else if (d_name.find("mali") != std::string::npos || v_name.find("mali") != std::string::npos)
- {
- return Vendor::kMali;
- }
- else if (d_name.find("power") != std::string::npos || v_name.find("power") != std::string::npos)
- {
- return Vendor::kPowerVR;
- }
- else if (d_name.find("nvidia") != std::string::npos || v_name.find("nvidia") != std::string::npos)
- {
- return Vendor::kNvidia;
- }
- else if (d_name.find("advanced micro devices") != std::string::npos ||
- v_name.find("advanced micro devices") != std::string::npos)
- {
- return Vendor::kAMD;
- }
- else if (d_name.find("intel") != std::string::npos || v_name.find("intel") != std::string::npos)
- {
- return Vendor::kIntel;
- }
- else
- {
- return Vendor::kUnknown;
- }
-}
-
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
- return gpu_version >= min_version && gpu_version < max_version;
-}
-} // namespace
-
-DeviceInfo DeviceInfoFromDeviceID(cl_device_id id)
-{
- DeviceInfo info;
- const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
- const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
- const auto opencl_c_version = GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
- info.vendor = ParseVendor(device_name, vendor_name);
- if (info.vendor == Vendor::kQualcomm)
- {
- info.adreno_info = AdrenoInfo(opencl_c_version);
- }
- else if (info.vendor == Vendor::kMali)
- {
- info.mali_info = MaliInfo(device_name);
- }
- info.cl_version = ParseCLVersion(opencl_c_version);
- info.extensions = absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
-
- info.supports_fp16 = false;
- info.supports_image3d_writes = false;
- for (const auto &ext : info.extensions)
- {
- if (ext == "cl_khr_fp16")
- {
- info.supports_fp16 = true;
- }
- if (ext == "cl_khr_3d_image_writes")
- {
- info.supports_image3d_writes = true;
- }
- }
-
- cl_device_fp_config f32_config =
- GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
- info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
-
- if (info.supports_fp16)
- {
- cl_device_fp_config f16_config;
- auto status = GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
- // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
- if (status.ok() && info.vendor != Vendor::kAMD)
- {
- info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
- }
- else
- { // happens on PowerVR
- f16_config = f32_config;
- info.supports_fp16_rtn = info.supports_fp32_rtn;
- }
- }
- else
- {
- info.supports_fp16_rtn = false;
- }
-
- if (info.vendor == Vendor::kPowerVR && !info.supports_fp16)
- {
- // PowerVR doesn't have full support of fp16 and so doesn't list this
- // extension. But it can support fp16 in MADs and as buffers/textures types,
- // so we will use it.
- info.supports_fp16 = true;
- info.supports_fp16_rtn = info.supports_fp32_rtn;
- }
-
- if (!info.supports_image3d_writes &&
- ((info.vendor == Vendor::kQualcomm &&
- IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) ||
- info.vendor == Vendor::kNvidia))
- {
- // in local tests Adreno 430 can write in image 3d, at least on small sizes,
- // but it doesn't have cl_khr_3d_image_writes in list of available
- // extensions
- // The same for NVidia
- info.supports_image3d_writes = true;
- }
- info.compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
- info.image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
- info.image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
- info.buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
- if (info.cl_version >= OpenCLVersion::CL_1_2)
- {
- info.image_buffer_max_size = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
- info.image_array_max_layers = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
- }
- info.image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
- info.image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
- info.image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
- int3 max_work_group_sizes;
- GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
- info.max_work_group_size_x = max_work_group_sizes.x;
- info.max_work_group_size_y = max_work_group_sizes.y;
- info.max_work_group_size_z = max_work_group_sizes.z;
-
- if (info.IsIntel())
- {
- if (info.SupportsExtension("cl_intel_required_subgroup_size"))
- {
- size_t sub_groups_count;
- cl_int status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, nullptr,
- &sub_groups_count);
- if (status == CL_SUCCESS)
- {
- std::vector<size_t> sub_group_sizes(sub_groups_count);
- status =
- clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
- sizeof(size_t) * sub_groups_count, sub_group_sizes.data(), nullptr);
- if (status == CL_SUCCESS)
- {
- for (size_t i = 0; i < sub_groups_count; ++i)
- {
- info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
- }
- }
- }
- }
- }
- return info;
-}
-
-CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
- : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id)
-{
-}
-
-CLDevice::CLDevice(const CLDevice &device)
- : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_)
-{
-}
-
-CLDevice &CLDevice::operator=(const CLDevice &device)
-{
- if (this != &device)
- {
- info_ = device.info_;
- id_ = device.id_;
- platform_id_ = device.platform_id_;
- }
- return *this;
-}
-
-CLDevice::CLDevice(CLDevice &&device)
- : info_(std::move(device.info_)), id_(device.id_), platform_id_(device.platform_id_)
-{
- device.id_ = nullptr;
- device.platform_id_ = nullptr;
-}
-
-CLDevice &CLDevice::operator=(CLDevice &&device)
-{
- if (this != &device)
- {
- id_ = nullptr;
- platform_id_ = nullptr;
- info_ = std::move(device.info_);
- std::swap(id_, device.id_);
- std::swap(platform_id_, device.platform_id_);
- }
- return *this;
-}
-
-bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
-
-bool CLDevice::SupportsExtension(const std::string &extension) const
-{
- return info_.SupportsExtension(extension);
-}
-
-bool CLDevice::SupportsTextureArray() const { return info_.SupportsTextureArray(); }
-
-bool CLDevice::SupportsImageBuffer() const { return info_.SupportsImageBuffer(); }
-
-bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
-
-bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
-
-bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
-
-std::string CLDevice::GetPlatformVersion() const
-{
- return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
-}
-
-bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); }
-
-bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const
-{
- return info_.SupportsSubGroupWithSize(sub_group_size);
-}
-
-bool CLDevice::IsAdreno() const { return info_.IsAdreno(); }
-
-bool CLDevice::IsAdreno3xx() const { return info_.IsAdreno3xx(); }
-
-bool CLDevice::IsAdreno4xx() const { return info_.IsAdreno4xx(); }
-
-bool CLDevice::IsAdreno5xx() const { return info_.IsAdreno5xx(); }
-
-bool CLDevice::IsAdreno6xx() const { return info_.IsAdreno6xx(); }
-
-bool CLDevice::IsAdreno6xxOrHigher() const { return info_.IsAdreno6xxOrHigher(); }
-
-bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); }
-
-bool CLDevice::IsNvidia() const { return info_.IsNvidia(); }
-
-bool CLDevice::IsMali() const { return info_.IsMali(); }
-
-bool CLDevice::IsAMD() const { return info_.IsAMD(); }
-
-bool CLDevice::IsIntel() const { return info_.IsIntel(); }
-
-bool CLDevice::SupportsOneLayerTextureArray() const { return info_.SupportsOneLayerTextureArray(); }
-
-void CLDevice::DisableOneLayerTextureArray()
-{
- info_.adreno_info.support_one_layer_texture_array = false;
-}
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result)
-{
- cl_uint num_platforms;
- clGetPlatformIDs(0, nullptr, &num_platforms);
- if (num_platforms == 0)
- {
- return absl::UnknownError("No supported OpenCL platform.");
- }
- std::vector<cl_platform_id> platforms(num_platforms);
- clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
-
- cl_platform_id platform_id = platforms[0];
- cl_uint num_devices;
- clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
- if (num_devices == 0)
- {
- return absl::UnknownError("No GPU on current platform.");
- }
-
- std::vector<cl_device_id> devices(num_devices);
- clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(), nullptr);
-
- *result = CLDevice(devices[0], platform_id);
- return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h
deleted file mode 100644
index 6e740fe97..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-
-#include <string>
-#include <vector>
-
-#include "DeviceInfo.h"
-#include "OpenclWrapper.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A wrapper around opencl device id
-class CLDevice
-{
-public:
- CLDevice() = default;
- CLDevice(cl_device_id id, cl_platform_id platform_id);
-
- CLDevice(CLDevice &&device);
- CLDevice &operator=(CLDevice &&device);
- CLDevice(const CLDevice &);
- CLDevice &operator=(const CLDevice &);
-
- ~CLDevice() {}
-
- cl_device_id id() const { return id_; }
- cl_platform_id platform() const { return platform_id_; }
- std::string GetPlatformVersion() const;
-
- Vendor vendor() const { return info_.vendor; }
- OpenCLVersion cl_version() const { return info_.cl_version; }
- bool SupportsFP16() const;
- bool SupportsTextureArray() const;
- bool SupportsImageBuffer() const;
- bool SupportsImage3D() const;
- bool SupportsExtension(const std::string &extension) const;
- bool SupportsFP32RTN() const;
- bool SupportsFP16RTN() const;
- bool IsCL20OrHigher() const;
- bool SupportsSubGroupWithSize(int sub_group_size) const;
- bool IsAdreno() const;
- bool IsAdreno3xx() const;
- bool IsAdreno4xx() const;
- bool IsAdreno5xx() const;
- bool IsAdreno6xx() const;
- bool IsAdreno6xxOrHigher() const;
- bool IsPowerVR() const;
- bool IsNvidia() const;
- bool IsMali() const;
- bool IsAMD() const;
- bool IsIntel() const;
-
- // To track bug on some Adreno. b/131099086
- bool SupportsOneLayerTextureArray() const;
- void DisableOneLayerTextureArray();
-
- const DeviceInfo &GetInfo() const { return info_; }
- // We update device info during context creation, so as supported texture
- // formats can be requested from context only.
- mutable DeviceInfo info_;
-
-private:
- cl_device_id id_ = nullptr;
- cl_platform_id platform_id_ = nullptr;
-};
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result);
-
-template <typename T> T GetDeviceInfo(cl_device_id id, cl_device_info info)
-{
- T result;
- cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr);
- if (error != CL_SUCCESS)
- {
- return -1;
- }
- return result;
-}
-
-template <typename T> absl::Status GetDeviceInfo(cl_device_id id, cl_device_info info, T *result)
-{
- cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
- if (error != CL_SUCCESS)
- {
- return absl::InvalidArgumentError(CLErrorCodeToString(error));
- }
- return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h b/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h
deleted file mode 100644
index 48cd2fb00..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-
-#include <string>
-
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// @return if error_code is success, then return OK status. Otherwise translates
-// error code into a message.
-inline absl::Status GetOpenCLError(cl_int error_code)
-{
- if (error_code == CL_SUCCESS)
- {
- return absl::OkStatus();
- }
- return absl::InternalError("OpenCL error: " + CLErrorCodeToString(error_code));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc
deleted file mode 100644
index beb64a9a8..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClEvent.h"
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-CLEvent::CLEvent(cl_event event) : event_(event) {}
-
-CLEvent::CLEvent(CLEvent &&event) : event_(event.event_), name_(std::move(event.name_))
-{
- event.event_ = nullptr;
-}
-
-CLEvent &CLEvent::operator=(CLEvent &&event)
-{
- if (this != &event)
- {
- Release();
- std::swap(event_, event.event_);
- name_ = std::move(event.name_);
- }
- return *this;
-}
-
-uint64_t CLEvent::GetStartedTimeNs() const
-{
- cl_ulong time_ns;
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time_ns, nullptr);
- return time_ns;
-}
-
-uint64_t CLEvent::GetFinishedTimeNs() const
-{
- cl_ulong time_ns;
- clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time_ns, nullptr);
- return time_ns;
-}
-
-double CLEvent::GetEventTimeMs() const
-{
- const uint64_t start = GetStartedTimeNs();
- const uint64_t end = GetFinishedTimeNs();
- const uint64_t time_ns = (end - start);
-
- return static_cast<double>(time_ns) * 1e-6;
-}
-
-uint64_t CLEvent::GetEventTimeNs() const { return GetFinishedTimeNs() - GetStartedTimeNs(); }
-
-void CLEvent::SetName(const std::string &name) { name_ = name; }
-
-void CLEvent::Wait() const { clWaitForEvents(1, &event_); }
-
-CLEvent::~CLEvent() { Release(); }
-
-void CLEvent::Release()
-{
- if (event_)
- {
- clReleaseEvent(event_);
- event_ = nullptr;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h
deleted file mode 100644
index 265409ffe..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-
-#include <cstdint>
-#include <string>
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl event
-class CLEvent
-{
-public:
- CLEvent() {}
- explicit CLEvent(cl_event event);
-
- // Move only
- CLEvent(CLEvent &&event);
- CLEvent &operator=(CLEvent &&event);
- CLEvent(const CLEvent &) = delete;
- CLEvent &operator=(const CLEvent &) = delete;
-
- ~CLEvent();
-
- uint64_t GetStartedTimeNs() const;
- uint64_t GetFinishedTimeNs() const;
-
- double GetEventTimeMs() const;
- uint64_t GetEventTimeNs() const;
-
- void Wait() const;
-
- cl_event event() const { return event_; }
-
- bool is_valid() const { return event_ != nullptr; }
-
- void SetName(const std::string &name);
- std::string GetName() const { return name_; }
-
-private:
- void Release();
-
- cl_event event_ = nullptr;
-
- std::string name_; // optional, for profiling mostly
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc
deleted file mode 100644
index 247a63d39..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClImageFormat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels)
-{
- switch (num_channels)
- {
- case 1:
- return CL_R;
- case 2:
- return CL_RG;
- case 3:
- return CL_RGB;
- case 4:
- return CL_RGBA;
- default:
- return -1;
- }
-}
-
-cl_channel_type ToImageChannelType(DataType data_type)
-{
- switch (data_type)
- {
- case DataType::FLOAT32:
- return CL_FLOAT;
- case DataType::FLOAT16:
- return CL_HALF_FLOAT;
- default:
- return -1;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h
deleted file mode 100644
index a763746bd..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-
-#include "OpenclWrapper.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels);
-
-cl_channel_type ToImageChannelType(DataType data_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc
deleted file mode 100644
index f7745b9ac..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClKernel.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClProgram.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
- size_t max_work_group_size;
- cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(size_t), &max_work_group_size, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_WORK_GROUP_SIZE ",
- CLErrorCodeToString(error_code)));
- }
- *result = static_cast<int>(max_work_group_size);
- return absl::OkStatus();
-}
-
-absl::Status GetKernelPrivateMemorySize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
- cl_ulong private_mem_size;
- cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PRIVATE_MEM_SIZE,
- sizeof(cl_ulong), &private_mem_size, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_PRIVATE_MEM_SIZE ",
- CLErrorCodeToString(error_code)));
- }
- *result = static_cast<int>(private_mem_size);
- return absl::OkStatus();
-}
-
-} // namespace
-
-CLKernel::CLKernel(CLKernel &&kernel)
- : info_(kernel.info_), binding_counter_(kernel.binding_counter_),
- function_name_(std::move(kernel.function_name_)), program_(kernel.program_),
- kernel_(kernel.kernel_)
-{
- kernel.kernel_ = nullptr;
-}
-
-CLKernel &CLKernel::operator=(CLKernel &&kernel)
-{
- if (this != &kernel)
- {
- Release();
- std::swap(info_, kernel.info_);
- std::swap(binding_counter_, kernel.binding_counter_);
- function_name_ = std::move(kernel.function_name_);
- std::swap(program_, kernel.program_);
- std::swap(kernel_, kernel.kernel_);
- }
- return *this;
-}
-
-CLKernel::~CLKernel() { Release(); }
-
-absl::Status CLKernel::ReInit() const
-{
- clReleaseKernel(kernel_);
- cl_kernel *kern_ptr = const_cast<cl_kernel *>(&kernel_);
- int error_code;
- *kern_ptr = clCreateKernel(program_, function_name_.c_str(), &error_code);
- if (!kernel_ || error_code != CL_SUCCESS)
- {
- *kern_ptr = nullptr;
- return absl::UnknownError(
- absl::StrCat("Failed to create ", function_name_, CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-void CLKernel::Release()
-{
- if (kernel_)
- {
- clReleaseKernel(kernel_);
- clReleaseProgram(program_);
- kernel_ = nullptr;
- }
-}
-
-absl::Status CLKernel::CreateFromProgram(const CLProgram &program, const std::string &function_name)
-{
- int error_code;
- function_name_ = function_name;
- kernel_ = clCreateKernel(program.program(), function_name.c_str(), &error_code);
- if (!kernel_ || error_code != CL_SUCCESS)
- {
- kernel_ = nullptr;
- return absl::UnknownError(
- absl::StrCat("Failed to create ", function_name, CLErrorCodeToString(error_code)));
- }
-
- program_ = program.program();
- clRetainProgram(program_);
-
- RETURN_IF_ERROR(
- GetKernelPrivateMemorySize(kernel_, program.GetDeviceId(), &info_.private_memory_size));
- RETURN_IF_ERROR(
- GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(), &info_.max_work_group_size));
- return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetMemory(int index, cl_mem memory)
-{
- return SetBytes(index, &memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetMemoryAuto(cl_mem memory)
-{
- return SetBytesAuto(&memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetBytes(int index, const void *ptr, int length) const
-{
- const int error_code = clSetKernelArg(kernel_, index, length, ptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to set kernel arguments - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetBytesAuto(const void *ptr, int length)
-{
- const int error_code = clSetKernelArg(kernel_, binding_counter_, length, ptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
- CLErrorCodeToString(error_code), "(at index - ",
- binding_counter_, ")"));
- }
- binding_counter_++;
- return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h
deleted file mode 100644
index 9575b7946..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-
-#include <string>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClProgram.h"
-#include "OpenclWrapper.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct KernelInfo
-{
- int private_memory_size = 0;
- int max_work_group_size = 0;
-};
-
-// Arguments binding to CLKernel can be manual or automatic
-// In manual you specify binding index explicitly
-// In automatic binding, index auto-incremented with every binding call
-// Also, if you use automatic mode you must call ResetBindingCounter
-// before parameters binding
-class CLKernel
-{
-public:
- CLKernel() {}
-
- // Move only
- CLKernel(CLKernel &&kernel);
- CLKernel &operator=(CLKernel &&kernel);
- CLKernel(const CLKernel &) = delete;
- CLKernel &operator=(const CLKernel &) = delete;
-
- ~CLKernel();
-
- cl_kernel kernel() const { return kernel_; }
-
- absl::Status CreateFromProgram(const CLProgram &program, const std::string &function_name);
-
- absl::Status SetMemory(int index, cl_mem memory);
- absl::Status SetMemoryAuto(cl_mem memory);
- template <typename T> absl::Status SetBytes(int index, const T &value) const
- {
- return SetBytes(index, static_cast<const void *>(&value), sizeof(T));
- }
- template <typename T> absl::Status SetBytesAuto(const T &value)
- {
- return SetBytesAuto(static_cast<const void *>(&value), sizeof(T));
- }
-
- int GetBindingCounter() const { return binding_counter_; }
- void ResetBindingCounter() { binding_counter_ = 0; }
-
- // Do not use this function
- // workaround for Mali memory leak
- absl::Status ReInit() const;
-
- KernelInfo info_;
-
-private:
- void Release();
- absl::Status SetBytes(int index, const void *ptr, int length) const;
- absl::Status SetBytesAuto(const void *ptr, int length);
-
- int binding_counter_ = -1;
-
- std::string function_name_ = "";
- // reference to program from which kernel was created
- cl_program program_ = nullptr;
- cl_kernel kernel_ = nullptr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h
deleted file mode 100644
index c704ec71f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-
-#include <algorithm>
-
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// RAII wrapper for OpenCL memory object.
-//
-// Image is moveable but not copyable.
-class CLMemory
-{
-public:
- // Creates invalid object.
- CLMemory() : CLMemory(nullptr, false) {}
-
- CLMemory(cl_mem memory, bool has_ownership) : memory_(memory), has_ownership_(has_ownership) {}
-
- // Move-only
- CLMemory(const CLMemory &) = delete;
- CLMemory &operator=(const CLMemory &) = delete;
- CLMemory(CLMemory &&image) : memory_(image.memory_), has_ownership_(image.has_ownership_)
- {
- image.memory_ = nullptr;
- }
-
- ~CLMemory() { Invalidate(); }
-
- CLMemory &operator=(CLMemory &&image)
- {
- if (this != &image)
- {
- Invalidate();
- std::swap(memory_, image.memory_);
- has_ownership_ = image.has_ownership_;
- }
- return *this;
- }
-
- cl_mem memory() const { return memory_; }
-
- bool is_valid() const { return memory_ != nullptr; }
-
- // @return true if this object actually owns corresponding CL memory
- // and manages it's lifetime.
- bool has_ownership() const { return has_ownership_; }
-
- cl_mem Release()
- {
- cl_mem to_return = memory_;
- memory_ = nullptr;
- return to_return;
- }
-
-private:
- void Invalidate()
- {
- if (memory_ && has_ownership_)
- {
- clReleaseMemObject(memory_);
- }
- memory_ = nullptr;
- }
-
- cl_mem memory_ = nullptr;
- bool has_ownership_ = false;
-};
-
-cl_mem_flags ToClMemFlags(AccessType access_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc
deleted file mode 100644
index c72b01a73..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClProgram.h"
-
-#include <cstdint>
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetProgramBuildInfo(cl_program program, cl_device_id id, cl_program_build_info info)
-{
- size_t size;
- cl_int error_code = clGetProgramBuildInfo(program, id, info, 0, nullptr, &size);
- if (error_code != CL_SUCCESS)
- {
- return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
- }
-
- std::string result(size - 1, 0);
- error_code = clGetProgramBuildInfo(program, id, info, size, &result[0], nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
- }
- return result;
-}
-
-absl::Status GetBinarySize(cl_program program, size_t *binary_size)
-{
- cl_int error_code =
- clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), binary_size, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to get program binary size - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status BuildProgram(cl_program program, const CLDevice &device,
- const std::string &compiler_options)
-{
- const int error_code =
- clBuildProgram(program, 0, nullptr, compiler_options.c_str(), nullptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to build program executable - ", CLErrorCodeToString(error_code),
- GetProgramBuildInfo(program, device.id(), CL_PROGRAM_BUILD_LOG)));
- }
-
- return absl::OkStatus();
-}
-
-std::string CompilerOptionToString(const CLDevice &device, CompilerOptions option)
-{
- switch (option)
- {
- case CompilerOptions::ADRENO_FULL_SIMD_LINE:
- if (device.info_.adreno_info.gpu_version < 500)
- {
- return "-qcom-accelerate-16-bit";
- }
- else
- {
- return "-qcom-accelerate-16-bit=true";
- }
- case CompilerOptions::ADRENO_MORE_WAVES:
- if (device.info_.adreno_info.gpu_version >= 500)
- {
- return "-qcom-accelerate-16-bit=false";
- }
- else
- {
- return "";
- }
- case CompilerOptions::POWERVR_FP16:
- return "-cl-fast-relaxed-math";
- case CompilerOptions::CL_OPT_DISABLE:
- return "-cl-opt-disable";
- case CompilerOptions::CL_2_0:
- return "-cl-std=CL2.0";
- case CompilerOptions::CL_3_0:
- return "-cl-std=CL3.0";
- }
- return "";
-}
-
-} // namespace
-
-std::string CompilerOptionsToString(const CLDevice &device,
- const std::vector<CompilerOptions> &compiler_options)
-{
- std::string result;
- for (auto option : compiler_options)
- {
- absl::StrAppend(&result, CompilerOptionToString(device, option), " ");
- }
- return result;
-}
-
-CLProgram::CLProgram(cl_program program, cl_device_id device_id)
- : program_(program), device_id_(device_id)
-{
-}
-
-CLProgram::CLProgram(CLProgram &&program)
- : program_(program.program_), device_id_(program.device_id_)
-{
- program.program_ = nullptr;
-}
-
-CLProgram &CLProgram::operator=(CLProgram &&program)
-{
- if (this != &program)
- {
- Release();
- std::swap(program_, program.program_);
- std::swap(device_id_, program.device_id_);
- }
- return *this;
-}
-
-CLProgram::~CLProgram() { Release(); }
-
-void CLProgram::Release()
-{
- if (program_)
- {
- clReleaseProgram(program_);
- program_ = nullptr;
- }
-}
-
-absl::Status CLProgram::GetBinary(std::vector<uint8_t> *result) const
-{
- size_t binary_size;
- RETURN_IF_ERROR(GetBinarySize(program_, &binary_size));
- result->resize(result->size() + binary_size);
- uint8_t *binary_ptr = result->data() + result->size() - binary_size;
- cl_int error_code =
- clGetProgramInfo(program_, CL_PROGRAM_BINARIES, binary_size, &binary_ptr, nullptr);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to get program binary - ", CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
- const CLContext &context, const CLDevice &device, CLProgram *result)
-{
- int error_code;
- const char *source = code.c_str();
-
- cl_program program =
- clCreateProgramWithSource(context.context(), 1, &source, nullptr, &error_code);
- if (!program || error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to create compute program - ", CLErrorCodeToString(error_code)));
- }
-
- *result = CLProgram(program, device.id());
- RETURN_IF_ERROR(BuildProgram(program, device, compiler_options));
- return absl::OkStatus();
-}
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
- absl::Span<const uint8_t> binary, CLProgram *result)
-{
- cl_int binary_status;
- cl_int error_code;
- cl_device_id devices_list[] = {device.id()};
- size_t binary_size = binary.size();
- const uint8_t *binary_pointer = binary.data();
- cl_program program = clCreateProgramWithBinary(context.context(), 1, devices_list, &binary_size,
- &binary_pointer, &binary_status, &error_code);
- if (binary_status != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat(
- "Something wrong with binary after clCreateProgramWithBinary - ", binary_status));
- }
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(
- absl::StrCat("Failed to create program - ", CLErrorCodeToString(error_code)));
- }
- *result = CLProgram(program, device.id());
- return BuildProgram(program, device, "");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h
deleted file mode 100644
index d039ff698..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-
-#include <cstdint>
-#include <vector>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CompilerOptions
-{
- // ADRENO_FULL_SIMD_LINE:
- // Adreno can have 2 sizes for SIMD size.
- // On Adreno 4xx/5xx it is 32/64, on Adreno6xx it is 64/128.
- // Some our algorithms actually rely on exact size, for example on full
- // SIMD size, so we need this define.
- // This define is actually -qcom-accelerate-16-bit, but it controls SIMD
- // size.
- ADRENO_FULL_SIMD_LINE,
- ADRENO_MORE_WAVES,
- POWERVR_FP16,
- CL_OPT_DISABLE,
- CL_2_0,
- CL_3_0,
-};
-
-std::string CompilerOptionsToString(const CLDevice &device,
- const std::vector<CompilerOptions> &compiler_options);
-
-class CLProgram
-{
-public:
- CLProgram() {}
- CLProgram(cl_program program, cl_device_id device_id);
-
- // Move only
- CLProgram(CLProgram &&program);
- CLProgram &operator=(CLProgram &&program);
- CLProgram(const CLProgram &) = delete;
- CLProgram &operator=(const CLProgram &) = delete;
-
- ~CLProgram();
-
- cl_program program() const { return program_; }
-
- // Return the cl_device_id associated with the program object.
- // This can be the device associated with context on which the program object
- // has been created or can be device that was specified when a program object
- // was created using clCreateProgramWithBinary.
- cl_device_id GetDeviceId() const { return device_id_; }
-
- absl::Status GetBinary(std::vector<uint8_t> *result) const;
-
-private:
- void Release();
-
- cl_program program_ = nullptr;
-
- // reference
- cl_device_id device_id_ = nullptr;
-};
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
- const CLContext &context, const CLDevice &device, CLProgram *result);
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
- absl::Span<const uint8_t> binary, CLProgram *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc b/runtime/onert/backend/gpu_cl/open_cl/DataType.cc
deleted file mode 100644
index ce2aa8298..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DataType.h"
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-size_t SizeOf(DataType data_type)
-{
- switch (data_type)
- {
- case DataType::UINT8:
- case DataType::INT8:
- return 1;
- case DataType::FLOAT16:
- case DataType::INT16:
- case DataType::UINT16:
- return 2;
- case DataType::FLOAT32:
- case DataType::INT32:
- case DataType::UINT32:
- return 4;
- case DataType::FLOAT64:
- case DataType::INT64:
- case DataType::UINT64:
- return 8;
- case DataType::UNKNOWN:
- return 0;
- }
- return 0;
-}
-
-std::string ToString(DataType data_type)
-{
- switch (data_type)
- {
- case DataType::FLOAT16:
- return "float16";
- case DataType::FLOAT32:
- return "float32";
- case DataType::FLOAT64:
- return "float64";
- case DataType::INT16:
- return "int16";
- case DataType::INT32:
- return "int32";
- case DataType::INT64:
- return "int64";
- case DataType::INT8:
- return "int8";
- case DataType::UINT16:
- return "uint16";
- case DataType::UINT32:
- return "uint32";
- case DataType::UINT64:
- return "uint64";
- case DataType::UINT8:
- return "uint8";
- case DataType::UNKNOWN:
- return "unknown";
- }
- return "undefined";
-}
-
-std::string ToCLDataType(DataType data_type, int vec_size)
-{
- const std::string postfix = vec_size == 1 ? "" : std::to_string(vec_size);
- switch (data_type)
- {
- case DataType::FLOAT16:
- return "half" + postfix;
- case DataType::FLOAT32:
- return "float" + postfix;
- case DataType::FLOAT64:
- return "double" + postfix;
- case DataType::INT16:
- return "short" + postfix;
- case DataType::INT32:
- return "int" + postfix;
- case DataType::INT64:
- return "long" + postfix;
- case DataType::INT8:
- return "char" + postfix;
- case DataType::UINT16:
- return "ushort" + postfix;
- case DataType::UINT32:
- return "uint" + postfix;
- case DataType::UINT64:
- return "ulong" + postfix;
- case DataType::UINT8:
- return "uchar" + postfix;
- case DataType::UNKNOWN:
- return "unknown";
- }
- return "undefined";
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.h b/runtime/onert/backend/gpu_cl/open_cl/DataType.h
deleted file mode 100644
index 2a5afd551..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class DataType
-{
- UNKNOWN = 0,
- FLOAT16 = 1,
- FLOAT32 = 2,
- FLOAT64 = 3,
- UINT8 = 4,
- INT8 = 5,
- UINT16 = 6,
- INT16 = 7,
- UINT32 = 8,
- INT32 = 9,
- UINT64 = 10,
- INT64 = 11,
-};
-
-size_t SizeOf(DataType type);
-
-std::string ToString(DataType t);
-
-std::string ToCLDataType(DataType data_type, int vec_size = 1);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc
deleted file mode 100644
index 2966fad75..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DeviceInfo.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
- return gpu_version >= min_version && gpu_version < max_version;
-}
-
-MaliGPU GetMaliGPUVersion(const std::string &device_name)
-{
- const std::map<std::string, MaliGPU> kMapping = {
- {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
- {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
- {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
- {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
- {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71},
- {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76},
- {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77}, {"G68", MaliGPU::G68},
- {"G78", MaliGPU::G78},
- };
- for (const auto &v : kMapping)
- {
- if (device_name.find(v.first) != std::string::npos)
- {
- return v.second;
- }
- }
- return MaliGPU::UNKNOWN;
-}
-
-} // namespace
-
-// There is no rule for gpu version encoding, but we found these samples:
-// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2
-// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2
-// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3
-// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8
-// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9
-// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge
-// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4)
-// After the number string ends.
-// It is assumed that the <vendor-specific information> for Adreno GPUs has
-// the following format:
-// <text?><space?>Adreno(TM)<space><text?><version>
-// Returns -1 if vendor-specific information cannot be parsed
-int GetAdrenoGPUVersion(const std::string &gpu_version)
-{
- const std::string gpu = absl::AsciiStrToLower(gpu_version);
- const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
- size_t i = 0;
- for (; i < words.size(); ++i)
- {
- if (words[i].find("adreno") != words[i].npos)
- {
- break;
- }
- }
- i += 1;
- for (; i < words.size(); ++i)
- {
- int number;
- bool is_number = absl::SimpleAtoi(words[i], &number);
- // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
- if (is_number && number >= 300)
- {
- return number;
- }
- }
- return -1;
-}
-
-std::string VendorToString(Vendor v)
-{
- switch (v)
- {
- case Vendor::kQualcomm:
- return "Qualcomm";
- case Vendor::kMali:
- return "Mali";
- case Vendor::kPowerVR:
- return "PowerVR";
- case Vendor::kNvidia:
- return "NVIDIA";
- case Vendor::kAMD:
- return "AMD";
- case Vendor::kIntel:
- return "Intel";
- case Vendor::kUnknown:
- return "unknown vendor";
- default:
- return "Error";
- }
-}
-
-std::string OpenCLVersionToString(OpenCLVersion version)
-{
- switch (version)
- {
- case OpenCLVersion::CL_1_0:
- return "1.0";
- case OpenCLVersion::CL_1_1:
- return "1.1";
- case OpenCLVersion::CL_1_2:
- return "1.2";
- case OpenCLVersion::CL_2_0:
- return "2.0";
- case OpenCLVersion::CL_2_1:
- return "2.1";
- case OpenCLVersion::CL_2_2:
- return "2.2";
- case OpenCLVersion::CL_3_0:
- return "3.0";
- default:
- return "Error";
- }
-}
-
-AdrenoInfo::AdrenoInfo(const std::string &device_version)
- : gpu_version(GetAdrenoGPUVersion(device_version))
-{
-}
-
-int AdrenoInfo::GetMaximumWavesCount() const
-{
- if (gpu_version < 400)
- {
- return -1; // Adreno 3xx does not support it currently
- }
- else if (gpu_version >= 400 && gpu_version < 500)
- {
- return -1; // Adreno 4xx does not support it currently
- }
- else if (gpu_version >= 500 && gpu_version < 600)
- {
- return -1; // Adreno 5xx does not support it currently
- }
- else if (gpu_version >= 600 && gpu_version < 700)
- {
- return gpu_version == 640 ? 30 : 16;
- }
- else
- {
- return -1; // Adreno 7xx and higher does not exist yet
- }
-}
-
-int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const
-{
- if (gpu_version < 400)
- {
- return -1; // Adreno 3xx does not support it currently
- }
- else if (gpu_version >= 400 && gpu_version < 500)
- {
- return -1; // Adreno 4xx does not support it currently
- }
- else if (gpu_version >= 500 && gpu_version < 600)
- {
- return -1; // Adreno 5xx does not support it currently
- }
- else if (gpu_version >= 600 && gpu_version < 700)
- {
- return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
- }
- else
- {
- return -1; // Adreno 7xx and higher does not exist yet
- }
-}
-
-int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave) const
-{
- const int register_usage_per_wave = GetWaveSize(full_wave) * register_footprint_per_tread;
- const int possible_waves_count = GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
- return std::min(possible_waves_count, GetMaximumWavesCount());
-}
-
-int AdrenoInfo::GetWaveSize(bool full_wave) const
-{
- if (gpu_version < 400)
- {
- return -1; // Adreno 3xx does not support it currently
- }
- else if (gpu_version < 600)
- {
- return full_wave ? 64 : 32;
- }
- else
- {
- return full_wave ? 128 : 64;
- }
-}
-
-MaliInfo::MaliInfo(const std::string &device_name) : gpu_version(GetMaliGPUVersion(device_name)) {}
-
-bool MaliInfo::IsMaliT6xx() const
-{
- return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
- gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
- gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
-}
-
-bool MaliInfo::IsMaliT7xx() const
-{
- return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
-}
-
-bool MaliInfo::IsMaliT8xx() const
-{
- return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
- gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
-}
-
-bool MaliInfo::IsMidgard() const { return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); }
-
-bool MaliInfo::IsBifrostGen1() const
-{
- return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || gpu_version == MaliGPU::G71;
-}
-
-bool MaliInfo::IsBifrostGen2() const
-{
- return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
-}
-
-bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
-
-bool MaliInfo::IsBifrost() const { return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); }
-
-bool MaliInfo::IsValhall() const
-{
- return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77 ||
- gpu_version == MaliGPU::G68 || gpu_version == MaliGPU::G78;
-}
-
-bool DeviceInfo::SupportsTextureArray() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImageBuffer() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImage3D() const
-{
- if (vendor == Vendor::kMali)
- {
- // On Mali T880 read_imageh doesn't compile with image3d_t
- return false;
- }
- return supports_image3d_writes;
-}
-
-bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const
-{
- if (channels == 1)
- {
- return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d : supports_r_f16_tex2d;
- }
- else if (channels == 2)
- {
- return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d : supports_rg_f16_tex2d;
- }
- else if (channels == 3)
- {
- return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d : supports_rgb_f16_tex2d;
- }
- else if (channels == 4)
- {
- return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d : supports_rgba_f16_tex2d;
- }
- else
- {
- return false;
- }
-}
-
-bool DeviceInfo::SupportsOneLayerTextureArray() const
-{
- return !IsAdreno() || adreno_info.support_one_layer_texture_array;
-}
-
-bool DeviceInfo::SupportsExtension(const std::string &extension) const
-{
- for (const auto &ext : extensions)
- {
- if (ext == extension)
- {
- return true;
- }
- }
- return false;
-}
-
-bool DeviceInfo::IsCL20OrHigher() const
-{
- return cl_version != OpenCLVersion::CL_1_0 && cl_version != OpenCLVersion::CL_1_1 &&
- cl_version != OpenCLVersion::CL_1_2;
-}
-
-bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const
-{
- for (auto subgroup_size : supported_subgroup_sizes)
- {
- if (sub_group_size == subgroup_size)
- {
- return true;
- }
- }
- return false;
-}
-
-bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
-
-bool DeviceInfo::IsAdreno3xx() const
-{
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400);
-}
-
-bool DeviceInfo::IsAdreno4xx() const
-{
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500);
-}
-
-bool DeviceInfo::IsAdreno5xx() const
-{
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600);
-}
-
-bool DeviceInfo::IsAdreno6xx() const
-{
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700);
-}
-
-bool DeviceInfo::IsAdreno6xxOrHigher() const
-{
- return IsAdreno() && adreno_info.gpu_version >= 600;
-}
-
-bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; }
-
-bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; }
-
-bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; }
-
-bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; }
-
-bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h
deleted file mode 100644
index 85d7d4c80..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-
-#include <string>
-#include <vector>
-
-#include "DataType.h"
-
-// for use only in device_info.cc, but keep here to make tests
-int GetAdrenoGPUVersion(const std::string &gpu_version);
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Vendor
-{
- kQualcomm,
- kMali,
- kPowerVR,
- kNvidia,
- kAMD,
- kIntel,
- kUnknown
-};
-std::string VendorToString(Vendor v);
-
-enum class OpenCLVersion
-{
- UNKNOWN,
- CL_1_0,
- CL_1_1,
- CL_1_2,
- CL_2_0,
- CL_2_1,
- CL_2_2,
- CL_3_0
-};
-std::string OpenCLVersionToString(OpenCLVersion version);
-
-struct AdrenoInfo
-{
- AdrenoInfo() = default;
- explicit AdrenoInfo(const std::string &device_version);
- int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
-
- // This function returns some not very documented physical parameter of
- // Adreno6xx GPU.
- // We obtained it using Snapdragon Profiler.
- int GetMaximumWavesCount() const;
-
- // returns amount of register memory per CU(Compute Unit) in bytes.
- int GetRegisterMemorySizePerComputeUnit() const;
-
- // returns maximum possible amount of waves based on register usage.
- int GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave = true) const;
-
- int GetWaveSize(bool full_wave) const;
-
- // Not supported on some Adreno devices with specific driver version.
- // b/131099086
- bool support_one_layer_texture_array = true;
-};
-
-enum class MaliGPU
-{
- T604,
- T622,
- T624,
- T628,
- T658,
- T678,
- T720,
- T760,
- T820,
- T830,
- T860,
- T880,
- G31,
- G51,
- G71,
- G52,
- G72,
- G76,
- G57,
- G77,
- G68,
- G78,
- UNKNOWN
-};
-
-struct MaliInfo
-{
- MaliInfo() = default;
- explicit MaliInfo(const std::string &device_name);
- MaliGPU gpu_version = MaliGPU::UNKNOWN;
-
- bool IsMaliT6xx() const;
- bool IsMaliT7xx() const;
- bool IsMaliT8xx() const;
- bool IsMidgard() const;
- bool IsBifrostGen1() const;
- bool IsBifrostGen2() const;
- bool IsBifrostGen3() const;
- bool IsBifrost() const;
- bool IsValhall() const;
-};
-
-struct DeviceInfo
-{
- DeviceInfo() = default;
-
- bool IsAdreno() const;
- bool IsAdreno3xx() const;
- bool IsAdreno4xx() const;
- bool IsAdreno5xx() const;
- bool IsAdreno6xx() const;
- bool IsAdreno6xxOrHigher() const;
- bool IsPowerVR() const;
- bool IsNvidia() const;
- bool IsMali() const;
- bool IsAMD() const;
- bool IsIntel() const;
-
- bool SupportsTextureArray() const;
- bool SupportsImageBuffer() const;
- bool SupportsImage3D() const;
-
- bool SupportsFloatImage2D(DataType data_type, int channels) const;
-
- // To track bug on some Adreno. b/131099086
- bool SupportsOneLayerTextureArray() const;
-
- bool SupportsExtension(const std::string &extension) const;
- bool IsCL20OrHigher() const;
- bool SupportsSubGroupWithSize(int sub_group_size) const;
-
- std::vector<std::string> extensions;
- bool supports_fp16 = false;
- bool supports_image3d_writes = false;
- Vendor vendor = Vendor::kUnknown;
- OpenCLVersion cl_version = OpenCLVersion::UNKNOWN;
- int compute_units_count = 0;
- uint64_t buffer_max_size = 0;
- uint64_t image2d_max_width = 0;
- uint64_t image2d_max_height = 0;
- uint64_t image_buffer_max_size = 0;
- uint64_t image_array_max_layers = 0;
- uint64_t image3d_max_width = 0;
- uint64_t image3d_max_height = 0;
- uint64_t image3d_max_depth = 0;
- int max_work_group_size_x = 0;
- int max_work_group_size_y = 0;
- int max_work_group_size_z = 0;
- std::vector<int> supported_subgroup_sizes;
-
- // rtn is ROUND_TO_NEAREST
- // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
- // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
- // Mali from T6xx supports rtn
- // PowerVR supports only rtz
- bool supports_fp32_rtn = false;
- bool supports_fp16_rtn = false;
-
- bool supports_r_f16_tex2d = false;
- bool supports_rg_f16_tex2d = false;
- bool supports_rgb_f16_tex2d = false;
- bool supports_rgba_f16_tex2d = false;
-
- bool supports_r_f32_tex2d = false;
- bool supports_rg_f32_tex2d = false;
- bool supports_rgb_f32_tex2d = false;
- bool supports_rgba_f32_tex2d = false;
-
- AdrenoInfo adreno_info;
- MaliInfo mali_info;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc b/runtime/onert/backend/gpu_cl/open_cl/Environment.cc
deleted file mode 100644
index b558f0377..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Environment.h"
-
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Shape.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Environment::Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
- ProfilingCommandQueue &&profiling_queue)
- : device_(std::move(device)), context_(std::move(context)), queue_(std::move(queue)),
- profiling_queue_(std::move(profiling_queue))
-{
-}
-
-Environment::Environment(Environment &&environment)
- : device_(std::move(environment.device_)), context_(std::move(environment.context_)),
- queue_(std::move(environment.queue_)),
- profiling_queue_(std::move(environment.profiling_queue_)),
- program_cache_(std::move(environment.program_cache_))
-{
-}
-
-Environment &Environment::operator=(Environment &&environment)
-{
- if (this != &environment)
- {
- device_ = std::move(environment.device_);
- context_ = std::move(environment.context_);
- queue_ = std::move(environment.queue_);
- profiling_queue_ = std::move(environment.profiling_queue_);
- program_cache_ = std::move(environment.program_cache_);
- }
- return *this;
-}
-
-absl::Status Environment::Init()
-{
- if (device().IsAdreno() && device().SupportsTextureArray())
- {
- // Some Adreno < 600 have bug with one layer texture array. b/131099086
- // If we have one layer texture array and will write smt from kernel to this
- // texture, we will get zeroes instead of actual values.
- // The same kernel will work, if we use texture array with more than one
- // layer.
- if (device().info_.adreno_info.gpu_version < 600)
- {
- GetDevicePtr()->DisableOneLayerTextureArray();
- }
- }
- return absl::OkStatus();
-}
-
-void Environment::SetHighPerformance() const
-{
- // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetDefaultPerformance() const
-{
- // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetLowPerformance() const
-{
- // TODO(sorokin) use cl_perf_hint if available
-}
-
-std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const
-{
- std::vector<CalculationsPrecision> precisions;
- for (CalculationsPrecision precision :
- {CalculationsPrecision::F32, CalculationsPrecision::F32_F16, CalculationsPrecision::F16})
- {
- if (IsSupported(precision))
- {
- precisions.push_back(precision);
- }
- }
- return precisions;
-}
-
-bool Environment::IsSupported(CalculationsPrecision precision) const
-{
- switch (precision)
- {
- case CalculationsPrecision::F32_F16:
- case CalculationsPrecision::F16:
- return device_.SupportsFP16();
- case CalculationsPrecision::F32:
- return true;
- }
- return false;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStorages() const
-{
- std::vector<TensorStorageType> storage_types;
- for (auto storage_type :
- {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER, TensorStorageType::TEXTURE_ARRAY,
- TensorStorageType::IMAGE_BUFFER, TensorStorageType::TEXTURE_3D})
- {
- if (IsSupported(storage_type))
- {
- storage_types.push_back(storage_type);
- }
- }
- return storage_types;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStoragesWithHWZeroClampSupport() const
-{
- std::vector<TensorStorageType> storage_types;
- for (auto storage_type : {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
- TensorStorageType::TEXTURE_3D})
- {
- if (IsSupported(storage_type))
- {
- storage_types.push_back(storage_type);
- }
- }
- return storage_types;
-}
-
-bool Environment::IsSupported(TensorStorageType storage_type) const
-{
- switch (storage_type)
- {
- case TensorStorageType::TEXTURE_2D:
- return !device_.IsAMD();
- case TensorStorageType::BUFFER:
- return true;
- case TensorStorageType::TEXTURE_ARRAY:
- return !device_.IsAMD() && device_.SupportsTextureArray();
- case TensorStorageType::IMAGE_BUFFER:
- return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
- device_.SupportsImageBuffer();
- case TensorStorageType::TEXTURE_3D:
- return !device_.IsAMD() && device_.SupportsImage3D();
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return false;
- case TensorStorageType::UNKNOWN:
- return false;
- }
- return false;
-}
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info)
-{
- if (gpu_info.IsAdreno())
- {
- if (gpu_info.IsAdreno6xxOrHigher())
- {
- return TensorStorageType::TEXTURE_ARRAY;
- }
- else
- {
- return TensorStorageType::TEXTURE_2D;
- }
- }
- else if (gpu_info.IsPowerVR())
- {
- return TensorStorageType::TEXTURE_2D;
- }
- else if (gpu_info.IsMali())
- {
- const MaliInfo mali_info = gpu_info.mali_info;
- if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall())
- {
- return TensorStorageType::TEXTURE_2D;
- }
- else
- {
- return TensorStorageType::BUFFER;
- }
- }
- else if (gpu_info.IsNvidia())
- {
- return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
- : TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsAMD())
- {
- return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
- : TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsIntel())
- {
- return TensorStorageType::BUFFER;
- }
- return TensorStorageType::BUFFER;
-}
-
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info)
-{
- if (gpu_info.IsAdreno())
- {
- if (gpu_info.IsAdreno3xx() || gpu_info.IsAdreno4xx())
- {
- return TensorStorageType::BUFFER;
- }
- else
- {
- return TensorStorageType::IMAGE_BUFFER;
- }
- }
- else if (gpu_info.IsPowerVR())
- {
- return TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsMali())
- {
- return TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsNvidia())
- {
- return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
- : TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsAMD())
- {
- return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
- : TensorStorageType::BUFFER;
- }
- else if (gpu_info.IsIntel())
- {
- return TensorStorageType::BUFFER;
- }
- return TensorStorageType::BUFFER;
-}
-
-absl::Status CreateEnvironment(Environment *result)
-{
- CLDevice gpu;
- RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
-
- CLContext context;
- RETURN_IF_ERROR(CreateCLContext(gpu, &context));
- CLCommandQueue queue;
- RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
- ProfilingCommandQueue profiling_queue;
- RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
-
- *result =
- Environment(std::move(gpu), std::move(context), std::move(queue), std::move(profiling_queue));
- return result->Init();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.h b/runtime/onert/backend/gpu_cl/open_cl/Environment.h
deleted file mode 100644
index 47866b563..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "DeviceInfo.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "ProgramCache.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Environment
-{
-public:
- Environment() = default;
- explicit Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
- ProfilingCommandQueue &&profiling_queue);
- // Move only
- Environment(Environment &&environment);
- Environment &operator=(Environment &&environment);
- Environment(const Environment &) = delete;
- Environment &operator=(const Environment &) = delete;
-
- const CLDevice &device() const { return device_; }
- CLDevice *GetDevicePtr() { return &device_; }
- const CLDevice *GetDevicePtr() const { return &device_; }
- CLContext &context() { return context_; }
- CLCommandQueue *queue() { return &queue_; }
- ProfilingCommandQueue *profiling_queue() { return &profiling_queue_; }
- ProgramCache *program_cache() { return &program_cache_; }
- const ProgramCache *program_cache() const { return &program_cache_; }
-
- std::vector<CalculationsPrecision> GetSupportedPrecisions() const;
- bool IsSupported(CalculationsPrecision precision) const;
- std::vector<TensorStorageType> GetSupportedStorages() const;
- // returns storage types that support zero clamping when reading OOB in HW
- // (Height/Width) dimensions.
- std::vector<TensorStorageType> GetSupportedStoragesWithHWZeroClampSupport() const;
- bool IsSupported(TensorStorageType storage_type) const;
-
- absl::Status Init();
-
- void SetHighPerformance() const;
- void SetDefaultPerformance() const;
- void SetLowPerformance() const; // for energy saving
-
-private:
- CLDevice device_;
- CLContext context_;
- CLCommandQueue queue_;
- ProfilingCommandQueue profiling_queue_;
- ProgramCache program_cache_;
-};
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info);
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info);
-
-absl::Status CreateEnvironment(Environment *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h b/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h
deleted file mode 100644
index a31630235..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "ClContext.h"
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct GPUImage2DDescriptor
-{
- DataType data_type = DataType::UNKNOWN;
- AccessType access_type = AccessType::UNKNOWN;
- cl_mem memory = nullptr;
-};
-
-struct GPUImage3DDescriptor
-{
- DataType data_type = DataType::UNKNOWN;
- AccessType access_type = AccessType::UNKNOWN;
- cl_mem memory = nullptr;
-};
-
-struct GPUImage2DArrayDescriptor
-{
- DataType data_type = DataType::UNKNOWN;
- AccessType access_type = AccessType::UNKNOWN;
- cl_mem memory = nullptr;
-};
-
-struct GPUImageBufferDescriptor
-{
- DataType data_type = DataType::UNKNOWN;
- AccessType access_type = AccessType::UNKNOWN;
- cl_mem memory = nullptr;
-};
-
-struct GPUCustomMemoryDescriptor
-{
- std::string type_name = "";
- cl_mem memory = nullptr;
-};
-
-enum class MemoryType
-{
- GLOBAL,
- CONSTANT,
- LOCAL
-};
-
-std::string MemoryTypeToCLType(MemoryType type);
-
-struct GPUBufferDescriptor
-{
- DataType data_type = DataType::UNKNOWN;
- AccessType access_type = AccessType::UNKNOWN;
- int element_size = 0;
- MemoryType memory_type = MemoryType::GLOBAL;
- std::vector<std::string> attributes;
- cl_mem memory = nullptr;
-};
-
-struct GPUResources
-{
- std::vector<std::string> ints;
- std::vector<std::string> floats;
- std::vector<std::pair<std::string, GPUBufferDescriptor>> buffers;
- std::vector<std::pair<std::string, GPUImage2DDescriptor>> images2d;
- std::vector<std::pair<std::string, GPUImage2DArrayDescriptor>> image2d_arrays;
- std::vector<std::pair<std::string, GPUImage3DDescriptor>> images3d;
- std::vector<std::pair<std::string, GPUImageBufferDescriptor>> image_buffers;
- std::vector<std::pair<std::string, GPUCustomMemoryDescriptor>> custom_memories;
-
- std::vector<std::string> GetNames() const
- {
- std::vector<std::string> names = ints;
- names.insert(names.end(), floats.begin(), floats.end());
- for (const auto &obj : buffers)
- {
- names.push_back(obj.first);
- }
- for (const auto &obj : images2d)
- {
- names.push_back(obj.first);
- }
- for (const auto &obj : image2d_arrays)
- {
- names.push_back(obj.first);
- }
- for (const auto &obj : images3d)
- {
- names.push_back(obj.first);
- }
- for (const auto &obj : image_buffers)
- {
- names.push_back(obj.first);
- }
- for (const auto &obj : custom_memories)
- {
- names.push_back(obj.first);
- }
- return names;
- }
-};
-
-struct GPUResourcesWithValue
-{
- std::vector<std::pair<std::string, int>> ints;
- std::vector<std::pair<std::string, float>> floats;
- std::vector<std::pair<std::string, cl_mem>> buffers;
- std::vector<std::pair<std::string, cl_mem>> images2d;
- std::vector<std::pair<std::string, cl_mem>> image2d_arrays;
- std::vector<std::pair<std::string, cl_mem>> images3d;
- std::vector<std::pair<std::string, cl_mem>> image_buffers;
- std::vector<std::pair<std::string, cl_mem>> custom_memories;
-};
-
-class GPUObject;
-
-class GPUObjectDescriptor
-{
-public:
- GPUObjectDescriptor() = default;
- GPUObjectDescriptor(const GPUObjectDescriptor &) = default;
- GPUObjectDescriptor &operator=(const GPUObjectDescriptor &) = default;
- GPUObjectDescriptor(GPUObjectDescriptor &&obj_desc) : state_vars_(std::move(obj_desc.state_vars_))
- {
- }
- GPUObjectDescriptor &operator=(GPUObjectDescriptor &&obj_desc)
- {
- if (this != &obj_desc)
- {
- state_vars_ = std::move(obj_desc.state_vars_);
- }
- return *this;
- }
- virtual ~GPUObjectDescriptor() = default;
-
- void SetStateVar(const std::string &key, const std::string &value) const
- {
- state_vars_[key] = value;
- }
-
- virtual std::string PerformConstExpr(const std::string &) const { return ""; }
-
- virtual absl::Status PerformSelector(const std::string &, const std::vector<std::string> &,
- const std::vector<std::string> &, std::string *result) const
- {
- *result = "";
- return absl::OkStatus();
- }
- virtual GPUResources GetGPUResources() const { return GPUResources(); }
-
- virtual absl::Status CreateGPUObject(CLContext *, std::unique_ptr<GPUObject> *) const
- {
- return absl::OkStatus();
- }
- virtual void Release() {}
-
- void SetAccess(AccessType access_type) { access_type_ = access_type; }
- AccessType GetAccess() const { return access_type_; }
-
-protected:
- // friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
- // const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
- // friend void Decode(const data::GPUObjectDescriptor* fb_obj,
- // GPUObjectDescriptor* obj);
- mutable std::map<std::string, std::string> state_vars_;
- AccessType access_type_ = AccessType::UNKNOWN;
-};
-
-using GPUObjectDescriptorPtr = std::unique_ptr<GPUObjectDescriptor>;
-
-class GPUObject
-{
-public:
- GPUObject() = default;
- // Move only
- GPUObject(GPUObject &&obj_desc) = default;
- GPUObject &operator=(GPUObject &&obj_desc) = default;
- GPUObject(const GPUObject &) = delete;
- GPUObject &operator=(const GPUObject &) = delete;
- virtual ~GPUObject() = default;
- virtual absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const = 0;
-};
-
-using GPUObjectPtr = std::unique_ptr<GPUObject>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc
deleted file mode 100644
index afb7e2950..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InferenceContext.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClDevice.h"
-
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "Precision.h"
-#include "StorageTypeUtil.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Model.h"
-#include "Operations.h"
-#include "Shape.h"
-#include "Types.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-CLNode::CLNode(CLNode &&node)
- : operation(std::move(node.operation)), inputs(std::move(node.inputs)),
- outputs(std::move(node.outputs)), name(std::move(node.name))
-{
-}
-
-CLNode &CLNode::operator=(CLNode &&node)
-{
- if (this != &node)
- {
- operation = std::move(node.operation);
- inputs = std::move(node.inputs);
- outputs = std::move(node.outputs);
- name = std::move(node.name);
- }
- return *this;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h
deleted file mode 100644
index ebe2c5313..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClCommandQueue.h"
-#include "Environment.h"
-#include "GpuObject.h"
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "OpenclWrapper.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "Model.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct CLNode
-{
- std::unique_ptr<GPUOperation> operation;
- std::vector<ValueId> inputs;
- std::vector<ValueId> outputs;
-
- // Mostly for debug purposes.
- std::string name;
-
- CLNode() = default;
-
- CLNode(CLNode &&node);
- CLNode &operator=(CLNode &&node);
- CLNode(const CLNode &) = delete;
- CLNode &operator=(const CLNode &) = delete;
-};
-
-class InferenceContext
-{
-public:
- struct CreateInferenceInfo
- {
- CalculationsPrecision precision;
- TensorStorageType storage_type;
- ModelHints hints;
- };
-
- struct DummyTensor
- {
- BHWC shape;
- TensorDescriptor descriptor;
-
- bool operator==(const DummyTensor &b) const
- {
- return shape == b.shape && descriptor == b.descriptor;
- }
- };
-
- class TensorReserver
- {
- public:
- ValueId Add(const std::shared_ptr<DummyTensor> dummy)
- {
- reservations_[next_] = std::move(dummy);
- return next_++;
- }
- void Add(ValueId id, const std::shared_ptr<DummyTensor> dummy)
- {
- reservations_[id] = std::move(dummy);
- }
- void SetNext(ValueId id) { next_ = id; }
- bool HaveTensor(ValueId id) { return reservations_.find(id) != reservations_.end(); }
- std::shared_ptr<DummyTensor> Get(ValueId id) { return reservations_[id]; }
-
- std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const
- {
- std::vector<std::pair<ValueId, TensorDescriptor>> result;
- for (auto &v : reservations_)
- {
- TensorDescriptor desc = v.second->descriptor;
- desc.shape.b = v.second->shape.b;
- desc.shape.h = v.second->shape.h;
- desc.shape.w = v.second->shape.w;
- desc.shape.d = 1;
- desc.shape.c = v.second->shape.c;
- result.push_back({v.first, desc});
- }
- return result;
- }
-
- void Add(const std::vector<std::pair<ValueId, TensorDescriptor>> &tensors)
- {
- for (auto &v : tensors)
- {
- auto dummy = std::make_shared<DummyTensor>();
- dummy->descriptor = v.second;
- dummy->shape.b = v.second.shape.b;
- dummy->shape.h = v.second.shape.h;
- dummy->shape.w = v.second.shape.w;
- dummy->shape.c = v.second.shape.c;
- Add(v.first, dummy);
- }
- }
-
- private:
- std::unordered_map<ValueId, std::shared_ptr<DummyTensor>> reservations_;
- ValueId next_ = 0;
- };
-
-private:
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h b/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h
deleted file mode 100644
index f0423db86..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-
-#include <stdint.h>
-
-#include <vector>
-
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace internal_tensor
-{
-
-// Meta function given element type returns a type for Tensor data container.
-template <DataType Type> struct StorageType;
-
-template <> struct StorageType<DataType::FLOAT32>
-{
- using value = std::vector<float>;
-};
-
-template <> struct StorageType<DataType::INT32>
-{
- using value = std::vector<int32_t>;
-};
-
-} // namespace internal_tensor
-
-template <typename ShapeT, DataType Type> struct InternalTensor
-{
- using ShapeType = ShapeT;
-
- constexpr static DataType kType = Type;
-
- using TensorStorageType = typename internal_tensor::StorageType<Type>::value;
-
- // Opaque id of a tensor.
- int64_t id = -1;
-
- ShapeType shape;
-
- TensorStorageType data;
-};
-
-// TensorRef is a reference to another tensor. If an object should never hold
-// tensor data, then TensorRef should be used instead.
-template <typename ShapeT> struct TensorRef
-{
- using ShapeType = ShapeT;
-
- DataType type = DataType::UNKNOWN;
-
- ShapeT shape;
-
- // Opaque reference to a tensor. Upstream component is responsible for
- // resolving this reference into an actual tensor.
- int64_t ref = -1;
-
- // Specifies if the tensor should be a variable input tensor that must be an
- // output as well as an input to the graph.
- bool is_variable_input = false;
-};
-
-template <typename ShapeT, DataType Type> constexpr DataType InternalTensor<ShapeT, Type>::kType;
-
-template <typename ShapeT, DataType Type>
-InternalTensor<ShapeT, Type> MakeZeroTensor(const ShapeT &shape)
-{
- InternalTensor<ShapeT, Type> tensor;
- tensor.shape = shape;
- tensor.data =
- typename InternalTensor<ShapeT, Type>::TensorStorageType(shape.DimensionsProduct(), 0);
- return tensor;
-}
-
-using TensorFloat32 = InternalTensor<BHWC, DataType::FLOAT32>;
-using Tensor5DFloat32 = InternalTensor<BHWDC, DataType::FLOAT32>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc
deleted file mode 100644
index 3889d4369..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LinearStorage.h"
-
-#include "absl/strings/str_cat.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor &&desc)
- : GPUObjectDescriptor(std::move(desc)), storage_type(desc.storage_type),
- element_type(desc.element_type), memory_type(desc.memory_type), size(desc.size),
- data(std::move(desc.data))
-{
-}
-
-TensorLinearDescriptor &TensorLinearDescriptor::operator=(TensorLinearDescriptor &&desc)
-{
- if (this != &desc)
- {
- std::swap(storage_type, desc.storage_type);
- std::swap(element_type, desc.element_type);
- std::swap(memory_type, desc.memory_type);
- std::swap(size, desc.size);
- data = std::move(desc.data);
- GPUObjectDescriptor::operator=(std::move(desc));
- }
- return *this;
-}
-
-void TensorLinearDescriptor::Release() { data.clear(); }
-
-GPUResources TensorLinearDescriptor::GetGPUResources() const
-{
- GPUResources resources;
- resources.ints.push_back("length");
- if (storage_type == LinearStorageType::BUFFER)
- {
- GPUBufferDescriptor desc;
- desc.data_type = element_type;
- desc.access_type = access_type_;
- desc.element_size = 4;
- desc.memory_type = memory_type;
- resources.buffers.push_back({"buffer", desc});
- }
- else
- {
- GPUImage2DDescriptor desc;
- desc.data_type = element_type;
- desc.access_type = access_type_;
- resources.images2d.push_back({"tex2d", desc});
- }
- return resources;
-}
-
-absl::Status TensorLinearDescriptor::PerformSelector(const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &,
- std::string *result) const
-{
- if (selector == "Length")
- {
- *result = "length";
- return absl::OkStatus();
- }
- else if (selector == "Read")
- {
- return PerformReadSelector(args, result);
- }
- else if (selector == "GetPtr")
- {
- if (storage_type != LinearStorageType::BUFFER)
- {
- return absl::InvalidArgumentError(
- "GetPtr selector supported for LinearStorageType::BUFFER only.");
- }
- *result = "buffer";
- return absl::OkStatus();
- }
- else
- {
- return absl::NotFoundError(
- absl::StrCat("TensorLinearDescriptor don't have selector with name - ", selector));
- }
-}
-
-absl::Status TensorLinearDescriptor::PerformReadSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (args.size() != 1)
- {
- return absl::NotFoundError(absl::StrCat(
- "TensorLinearDescriptor Read require one argument, but ", args.size(), " was passed"));
- }
- if (storage_type == LinearStorageType::BUFFER)
- {
- *result = absl::StrCat("buffer[", args[0], "]");
- return absl::OkStatus();
- }
- else
- {
- const std::string read = element_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
- *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", 0))");
- return absl::OkStatus();
- }
-}
-
-absl::Status TensorLinearDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
- LinearStorage gpu_storage;
- RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context));
- *result = absl::make_unique<LinearStorage>(std::move(gpu_storage));
- return absl::OkStatus();
-}
-
-void TensorLinearDescriptor::UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src,
- int aligned_size)
-{
- size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size;
- if (element_type == DataType::FLOAT32)
- {
- data.resize(size * sizeof(float) * 4);
- float *gpu_data = reinterpret_cast<float *>(data.data());
- for (int i = 0; i < size * 4; ++i)
- {
- if (i < src.shape.v)
- {
- gpu_data[i] = src.data[i];
- }
- else
- {
- gpu_data[i] = 0.0f;
- }
- }
- }
- // TODO
- // It doesn't support F16 yet. I will try to add it later.
- //
- // else {
- // data.resize(size * sizeof(half) * 4);
- // half* gpu_data = reinterpret_cast<half*>(data.data());
- // for (int i = 0; i < size * 4; ++i) {
- // if (i < src.shape.v) {
- // gpu_data[i] = src.data[i];
- // } else {
- // gpu_data[i] = 0.0f;
- // }
- // }
- // }
-}
-
-void LinearStorage::Release()
-{
- if (memory_)
- {
- clReleaseMemObject(memory_);
- memory_ = nullptr;
- }
-}
-
-LinearStorage::LinearStorage(LinearStorage &&storage)
- : GPUObject(std::move(storage)), memory_(storage.memory_), depth_(storage.depth_),
- storage_type_(storage.storage_type_)
-{
- storage.memory_ = nullptr;
-}
-
-LinearStorage &LinearStorage::operator=(LinearStorage &&storage)
-{
- if (this != &storage)
- {
- Release();
- std::swap(memory_, storage.memory_);
- std::swap(depth_, storage.depth_);
- std::swap(storage_type_, storage.storage_type_);
- GPUObject::operator=(std::move(storage));
- }
- return *this;
-}
-
-absl::Status LinearStorage::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const
-{
- const auto *linear_desc = dynamic_cast<const TensorLinearDescriptor *>(obj_ptr);
- if (!linear_desc)
- {
- return absl::InvalidArgumentError("Expected TensorLinearDescriptor on input.");
- }
-
- resources->ints.push_back({"length", depth_});
-
- if (storage_type_ == LinearStorageType::BUFFER)
- {
- resources->buffers.push_back({"buffer", memory_});
- }
- else
- {
- resources->images2d.push_back({"tex2d", memory_});
- }
-
- return absl::OkStatus();
-}
-
-absl::Status LinearStorage::CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
- CLContext *context)
-{
- storage_type_ = desc.storage_type;
- depth_ = desc.size;
- uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
- if (storage_type_ == LinearStorageType::BUFFER)
- {
- bool read_only = desc.memory_type == MemoryType::CONSTANT;
- uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
- // TODO
- // It doesn't support F16 yet. I will try to add it later.
- //
- // const int float4_size = desc.element_type == DataType::FLOAT32
- // ? sizeof(float) * 4
- // : sizeof(half) * 4;
- const int float4_size = sizeof(float) * 4;
- return CreateCLBuffer(context->context(), depth_ * float4_size, read_only, data_ptr, &memory_);
- }
- else
- {
- return CreateRGBAImage2D(context->context(), depth_, 1,
- DataTypeToChannelType(desc.element_type), data_ptr, &memory_);
- }
-}
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type)
-{
- if (tensor_storage_type == TensorStorageType::BUFFER)
- {
- return LinearStorageType::BUFFER;
- }
- else
- {
- return LinearStorageType::TEXTURE_2D;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h
deleted file mode 100644
index f6c3ac82f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-
-#include <string>
-#include <utility>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class LinearStorageType
-{
- BUFFER,
- TEXTURE_2D
-};
-
-struct TensorLinearDescriptor : public GPUObjectDescriptor
-{
- LinearStorageType storage_type;
- DataType element_type; // FLOAT32 or FLOAT16
- MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER
-
- // optional
- int size = 0;
- std::vector<uint8_t> data;
-
- TensorLinearDescriptor() = default;
- TensorLinearDescriptor(const TensorLinearDescriptor &) = default;
- TensorLinearDescriptor &operator=(const TensorLinearDescriptor &) = default;
- TensorLinearDescriptor(TensorLinearDescriptor &&desc);
- TensorLinearDescriptor &operator=(TensorLinearDescriptor &&desc);
-
- void UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src, int aligned_size = 0);
-
- absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const override;
-
- GPUResources GetGPUResources() const override;
- absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
- absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
- void Release() override;
-};
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type);
-
-// Represent GPU 1D-array of FLT4(float4/half4) values
-// Can use inside texture2d or buffer
-class LinearStorage : public GPUObject
-{
-public:
- LinearStorage() {}
- ~LinearStorage() override { Release(); }
-
- // Move only
- LinearStorage(LinearStorage &&storage);
- LinearStorage &operator=(LinearStorage &&storage);
- LinearStorage(const LinearStorage &) = delete;
- LinearStorage &operator=(const LinearStorage &) = delete;
-
- absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const override;
-
- absl::Status CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
- CLContext *context);
-
-private:
- void Release();
-
- cl_mem memory_ = nullptr;
- int depth_;
- LinearStorageType storage_type_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Model.h b/runtime/onert/backend/gpu_cl/open_cl/Model.h
deleted file mode 100644
index f434bb22f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Model.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-
-#include <string>
-
-#include "absl/types/any.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// There is yet another representation of CNN graph. The primary purpose of this
-// representation is to simplify graph manipulation.
-
-using ValueId = uint32_t;
-
-// Used to emulate quantized behavior.
-struct QuantizationParams
-{
- float min = 0;
- float max = 0;
- float scale = 0;
-};
-
-struct Operation
-{
- std::string type;
- absl::any attributes;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h b/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h
deleted file mode 100644
index 474c56b2a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ModelHints
-{
- using ModelHint = uint64_t;
-
- // By default we want the fastest inference.
- static constexpr ModelHint kFastestInference = 0x00000000;
- // Can improve compilation time, but inference can be slower.
- static constexpr ModelHint kReduceKernelsCount = 0x00000001;
- // Can improve tuning time, but inference can be slower.
- static constexpr ModelHint kFastTuning = 0x00000002;
-
- // Experimental.
- // Can improve performance and memory consumption, but slow down
- // initialization a lot and create more kernels.
- static constexpr ModelHint kAllowSpecialKernels = 0x00000004;
-
- void Add(ModelHint hint)
- {
- if (hint == kFastestInference)
- {
- hints = kFastestInference;
- }
- else
- {
- hints |= hint;
- }
- }
-
- bool Check(ModelHint hint) const { return hints & hint; }
-
- uint64_t hints = kFastestInference;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc
deleted file mode 100644
index dbaf6faf6..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if defined(_WIN32)
-#define __WINDOWS__
-#endif
-
-#include "OpenclWrapper.h"
-
-#ifdef __WINDOWS__
-#include <windows.h>
-#else
-#include <dlfcn.h>
-#endif
-
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-#ifdef __ANDROID__
-#define LoadFunction(function) \
- if (use_wrapper) \
- { \
- function = reinterpret_cast<PFN_##function>(loadOpenCLPointer(#function)); \
- } \
- else \
- { \
- function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function)); \
- }
-#elif defined(__WINDOWS__)
-#define LoadFunction(function) \
- function = reinterpret_cast<PFN_##function>(GetProcAddress(libopencl, #function));
-#else
-#define LoadFunction(function) \
- function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function));
-#endif
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl);
-#else
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper);
-#endif
-
-absl::Status LoadOpenCL(void **libopencl)
-{
-#ifdef __WINDOWS__
- HMODULE libopencl = LoadLibraryA("OpenCL.dll");
- if (libopencl)
- {
- LoadOpenCLFunctions(libopencl);
- return absl::OkStatus();
- }
- else
- {
- DWORD error_code = GetLastError();
- return absl::UnknownError(
- absl::StrCat("Can not open OpenCL library on this device, error code - ", error_code));
- }
-#else
- *libopencl = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL);
- if (*libopencl)
- {
- LoadOpenCLFunctions(libopencl, false);
- return absl::OkStatus();
- }
- // record error
- std::string error(dlerror());
-#ifdef __ANDROID__
- // Pixel phone or auto?
- *libopencl = dlopen("libOpenCL-pixel.so", RTLD_NOW | RTLD_LOCAL);
- if (!*libopencl)
- {
- *libopencl = dlopen("libOpenCL-car.so", RTLD_NOW | RTLD_LOCAL);
- }
- if (*libopencl)
- {
- typedef void (*enableOpenCL_t)();
- enableOpenCL_t enableOpenCL =
- reinterpret_cast<enableOpenCL_t>(dlsym(*libopencl, "enableOpenCL"));
- enableOpenCL();
- LoadOpenCLFunctions(libopencl, true);
- return absl::OkStatus();
- }
-#endif
- return absl::UnknownError(absl::StrCat("Can not open OpenCL library on this device - ", error));
-#endif
-}
-
-void UnloadOpenCL(void *libopencl)
-{
- if (libopencl)
- {
- dlclose(libopencl);
- }
-}
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl)
-{
-#else
-#ifdef __ANDROID__
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper)
-{
- typedef void *(*loadOpenCLPointer_t)(const char *name);
- loadOpenCLPointer_t loadOpenCLPointer;
- if (use_wrapper)
- {
- loadOpenCLPointer =
- reinterpret_cast<loadOpenCLPointer_t>(dlsym(*libopencl, "loadOpenCLPointer"));
- }
-#else
-void LoadOpenCLFunctions(void **libopencl, bool)
-{
-#endif // __ANDROID__
-#endif // __WINDOWS__
-
- LoadFunction(clGetPlatformIDs);
- LoadFunction(clGetPlatformInfo);
- LoadFunction(clGetDeviceIDs);
- LoadFunction(clGetDeviceInfo);
- LoadFunction(clCreateSubDevices);
- LoadFunction(clRetainDevice);
- LoadFunction(clReleaseDevice);
- LoadFunction(clCreateContext);
- LoadFunction(clCreateContextFromType);
- LoadFunction(clRetainContext);
- LoadFunction(clReleaseContext);
- LoadFunction(clGetContextInfo);
- LoadFunction(clCreateCommandQueueWithProperties);
- LoadFunction(clRetainCommandQueue);
- LoadFunction(clReleaseCommandQueue);
- LoadFunction(clGetCommandQueueInfo);
- LoadFunction(clCreateBuffer);
- LoadFunction(clCreateSubBuffer);
- LoadFunction(clCreateImage);
- LoadFunction(clCreatePipe);
- LoadFunction(clRetainMemObject);
- LoadFunction(clReleaseMemObject);
- LoadFunction(clGetSupportedImageFormats);
- LoadFunction(clGetMemObjectInfo);
- LoadFunction(clGetImageInfo);
- LoadFunction(clGetPipeInfo);
- LoadFunction(clSetMemObjectDestructorCallback);
- LoadFunction(clSVMAlloc);
- LoadFunction(clSVMFree);
- LoadFunction(clCreateSamplerWithProperties);
- LoadFunction(clRetainSampler);
- LoadFunction(clReleaseSampler);
- LoadFunction(clGetSamplerInfo);
- LoadFunction(clCreateProgramWithSource);
- LoadFunction(clCreateProgramWithBinary);
- LoadFunction(clCreateProgramWithBuiltInKernels);
- LoadFunction(clRetainProgram);
- LoadFunction(clReleaseProgram);
- LoadFunction(clBuildProgram);
- LoadFunction(clCompileProgram);
- LoadFunction(clLinkProgram);
- LoadFunction(clUnloadPlatformCompiler);
- LoadFunction(clGetProgramInfo);
- LoadFunction(clGetProgramBuildInfo);
- LoadFunction(clCreateKernel);
- LoadFunction(clCreateKernelsInProgram);
- LoadFunction(clRetainKernel);
- LoadFunction(clReleaseKernel);
- LoadFunction(clSetKernelArg);
- LoadFunction(clSetKernelArgSVMPointer);
- LoadFunction(clSetKernelExecInfo);
- LoadFunction(clGetKernelInfo);
- LoadFunction(clGetKernelArgInfo);
- LoadFunction(clGetKernelWorkGroupInfo);
- LoadFunction(clWaitForEvents);
- LoadFunction(clGetEventInfo);
- LoadFunction(clCreateUserEvent);
- LoadFunction(clRetainEvent);
- LoadFunction(clReleaseEvent);
- LoadFunction(clSetUserEventStatus);
- LoadFunction(clSetEventCallback);
- LoadFunction(clGetEventProfilingInfo);
- LoadFunction(clFlush);
- LoadFunction(clFinish);
- LoadFunction(clEnqueueReadBuffer);
- LoadFunction(clEnqueueReadBufferRect);
- LoadFunction(clEnqueueWriteBuffer);
- LoadFunction(clEnqueueWriteBufferRect);
- LoadFunction(clEnqueueFillBuffer);
- LoadFunction(clEnqueueCopyBuffer);
- LoadFunction(clEnqueueCopyBufferRect);
- LoadFunction(clEnqueueReadImage);
- LoadFunction(clEnqueueWriteImage);
- LoadFunction(clEnqueueFillImage);
- LoadFunction(clEnqueueCopyImage);
- LoadFunction(clEnqueueCopyImageToBuffer);
- LoadFunction(clEnqueueCopyBufferToImage);
- LoadFunction(clEnqueueMapBuffer);
- LoadFunction(clEnqueueMapImage);
- LoadFunction(clEnqueueUnmapMemObject);
- LoadFunction(clEnqueueMigrateMemObjects);
- LoadFunction(clEnqueueNDRangeKernel);
- LoadFunction(clEnqueueNativeKernel);
- LoadFunction(clEnqueueMarkerWithWaitList);
- LoadFunction(clEnqueueBarrierWithWaitList);
- LoadFunction(clEnqueueSVMFree);
- LoadFunction(clEnqueueSVMMemcpy);
- LoadFunction(clEnqueueSVMMemFill);
- LoadFunction(clEnqueueSVMMap);
- LoadFunction(clEnqueueSVMUnmap);
- LoadFunction(clGetExtensionFunctionAddressForPlatform);
- LoadFunction(clCreateImage2D);
- LoadFunction(clCreateImage3D);
- LoadFunction(clEnqueueMarker);
- LoadFunction(clEnqueueWaitForEvents);
- LoadFunction(clEnqueueBarrier);
- LoadFunction(clUnloadCompiler);
- LoadFunction(clGetExtensionFunctionAddress);
- LoadFunction(clCreateCommandQueue);
- LoadFunction(clCreateSampler);
- LoadFunction(clEnqueueTask);
-
- // OpenGL sharing
- LoadFunction(clCreateFromGLBuffer);
- LoadFunction(clCreateFromGLTexture);
- LoadFunction(clEnqueueAcquireGLObjects);
- LoadFunction(clEnqueueReleaseGLObjects);
-
- // cl_khr_egl_event extension
- LoadFunction(clCreateEventFromEGLSyncKHR);
-
- // EGL sharing
- LoadFunction(clCreateFromEGLImageKHR);
- LoadFunction(clEnqueueAcquireEGLObjectsKHR);
- LoadFunction(clEnqueueReleaseEGLObjectsKHR);
-} // namespace gpu_cl
-
-// No OpenCL support, do not set function addresses
-PFN_clGetPlatformIDs clGetPlatformIDs;
-PFN_clGetPlatformInfo clGetPlatformInfo;
-PFN_clGetDeviceIDs clGetDeviceIDs;
-PFN_clGetDeviceInfo clGetDeviceInfo;
-PFN_clCreateSubDevices clCreateSubDevices;
-PFN_clRetainDevice clRetainDevice;
-PFN_clReleaseDevice clReleaseDevice;
-PFN_clCreateContext clCreateContext;
-PFN_clCreateContextFromType clCreateContextFromType;
-PFN_clRetainContext clRetainContext;
-PFN_clReleaseContext clReleaseContext;
-PFN_clGetContextInfo clGetContextInfo;
-PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-PFN_clRetainCommandQueue clRetainCommandQueue;
-PFN_clReleaseCommandQueue clReleaseCommandQueue;
-PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-PFN_clCreateBuffer clCreateBuffer;
-PFN_clCreateSubBuffer clCreateSubBuffer;
-PFN_clCreateImage clCreateImage;
-PFN_clCreatePipe clCreatePipe;
-PFN_clRetainMemObject clRetainMemObject;
-PFN_clReleaseMemObject clReleaseMemObject;
-PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-PFN_clGetMemObjectInfo clGetMemObjectInfo;
-PFN_clGetImageInfo clGetImageInfo;
-PFN_clGetPipeInfo clGetPipeInfo;
-PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-PFN_clSVMAlloc clSVMAlloc;
-PFN_clSVMFree clSVMFree;
-PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-PFN_clRetainSampler clRetainSampler;
-PFN_clReleaseSampler clReleaseSampler;
-PFN_clGetSamplerInfo clGetSamplerInfo;
-PFN_clCreateProgramWithSource clCreateProgramWithSource;
-PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-PFN_clRetainProgram clRetainProgram;
-PFN_clReleaseProgram clReleaseProgram;
-PFN_clBuildProgram clBuildProgram;
-PFN_clCompileProgram clCompileProgram;
-PFN_clLinkProgram clLinkProgram;
-PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-PFN_clGetProgramInfo clGetProgramInfo;
-PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-PFN_clCreateKernel clCreateKernel;
-PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-PFN_clRetainKernel clRetainKernel;
-PFN_clReleaseKernel clReleaseKernel;
-PFN_clSetKernelArg clSetKernelArg;
-PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-PFN_clSetKernelExecInfo clSetKernelExecInfo;
-PFN_clGetKernelInfo clGetKernelInfo;
-PFN_clGetKernelArgInfo clGetKernelArgInfo;
-PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-PFN_clWaitForEvents clWaitForEvents;
-PFN_clGetEventInfo clGetEventInfo;
-PFN_clCreateUserEvent clCreateUserEvent;
-PFN_clRetainEvent clRetainEvent;
-PFN_clReleaseEvent clReleaseEvent;
-PFN_clSetUserEventStatus clSetUserEventStatus;
-PFN_clSetEventCallback clSetEventCallback;
-PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-PFN_clFlush clFlush;
-PFN_clFinish clFinish;
-PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-PFN_clEnqueueReadImage clEnqueueReadImage;
-PFN_clEnqueueWriteImage clEnqueueWriteImage;
-PFN_clEnqueueFillImage clEnqueueFillImage;
-PFN_clEnqueueCopyImage clEnqueueCopyImage;
-PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-PFN_clEnqueueMapImage clEnqueueMapImage;
-PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-PFN_clEnqueueSVMFree clEnqueueSVMFree;
-PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-PFN_clEnqueueSVMMap clEnqueueSVMMap;
-PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-PFN_clCreateImage2D clCreateImage2D;
-PFN_clCreateImage3D clCreateImage3D;
-PFN_clEnqueueMarker clEnqueueMarker;
-PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-PFN_clEnqueueBarrier clEnqueueBarrier;
-PFN_clUnloadCompiler clUnloadCompiler;
-PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-PFN_clCreateCommandQueue clCreateCommandQueue;
-PFN_clCreateSampler clCreateSampler;
-PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-PFN_clCreateFromGLTexture clCreateFromGLTexture;
-PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
- const cl_image_format *image_format, const cl_image_desc *image_desc,
- void *host_ptr, cl_int *errcode_ret)
-{
- if (clCreateImage)
- { // clCreateImage available since OpenCL 1.2
- return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
- }
- else
- {
- return clCreateImage2D(context, flags, image_format, image_desc->image_width,
- image_desc->image_height, image_desc->image_row_pitch, host_ptr,
- errcode_ret);
- }
-}
-
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
- const cl_image_format *image_format, const cl_image_desc *image_desc,
- void *host_ptr, cl_int *errcode_ret)
-{
- if (clCreateImage)
- { // clCreateImage available since OpenCL 1.2
- return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
- }
- else
- {
- return clCreateImage3D(context, flags, image_format, image_desc->image_width,
- image_desc->image_height, image_desc->image_depth,
- image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr,
- errcode_ret);
- }
-}
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h
deleted file mode 100644
index 021f8735a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-
-#include "CL/cl.h"
-#include "CL/cl_egl.h"
-#include "CL/cl_ext.h"
-#include "CL/cl_gl.h"
-#include "CL/cl_platform.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-absl::Status LoadOpenCL(void **libopencl);
-void UnloadOpenCL(void *libopencl);
-
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformIDs)(
- cl_uint /* num_entries */, cl_platform_id * /* platforms */,
- cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformInfo)(
- cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceIDs)(
- cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */,
- cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceInfo)(
- cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateSubDevices)(
- cl_device_id /* in_device */, const cl_device_partition_property * /* properties */,
- cl_uint /* num_devices */, cl_device_id * /* out_devices */,
- cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainDevice)(cl_device_id /* device */)
- CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clReleaseDevice)(cl_device_id /* device */)
- CL_API_SUFFIX__VERSION_1_2;
-typedef cl_context(CL_API_CALL *PFN_clCreateContext)(
- const cl_context_properties * /* properties */, cl_uint /* num_devices */,
- const cl_device_id * /* devices */,
- void(CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
- void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_context(CL_API_CALL *PFN_clCreateContextFromType)(
- const cl_context_properties * /* properties */, cl_device_type /* device_type */,
- void(CL_CALLBACK * /* pfn_notify*/)(const char *, const void *, size_t, void *),
- void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainContext)(cl_context /* context */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseContext)(cl_context /* context */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetContextInfo)(
- cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueueWithProperties)(
- cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainCommandQueue)(cl_command_queue /* command_queue */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseCommandQueue)(cl_command_queue /* command_queue */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetCommandQueueInfo)(
- cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */,
- size_t /* param_value_size */, void * /* param_value */,
- size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateBuffer)(
- cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateSubBuffer)(
- cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */,
- const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage)(
- cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
- const cl_image_desc * /* image_desc */, void * /* host_ptr */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreatePipe)(
- cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */,
- cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSupportedImageFormats)(
- cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */,
- cl_uint /* num_entries */, cl_image_format * /* image_formats */,
- cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetMemObjectInfo)(
- cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetImageInfo)(
- cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPipeInfo)(
- cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetMemObjectDestructorCallback)(
- cl_mem /* memobj */,
- void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/),
- void * /*user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef void *(CL_API_CALL *PFN_clSVMAlloc)(cl_context /* context */, cl_svm_mem_flags /* flags */,
- size_t /* size */,
- cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0;
-typedef void(CL_API_CALL *PFN_clSVMFree)(cl_context /* context */,
- void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSamplerWithProperties)(
- cl_context /* context */, const cl_sampler_properties * /* normalized_coords */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainSampler)(cl_sampler /* sampler */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseSampler)(cl_sampler /* sampler */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSamplerInfo)(
- cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithSource)(
- cl_context /* context */, cl_uint /* count */, const char ** /* strings */,
- const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBinary)(
- cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
- const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBuiltInKernels)(
- cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
- const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainProgram)(cl_program /* program */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseProgram)(cl_program /* program */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clBuildProgram)(
- cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
- const char * /* options */,
- void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCompileProgram)(
- cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
- const char * /* options */, cl_uint /* num_input_headers */,
- const cl_program * /* input_headers */, const char ** /* header_include_names */,
- void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_program(CL_API_CALL *PFN_clLinkProgram)(
- cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
- const char * /* options */, cl_uint /* num_input_programs */,
- const cl_program * /* input_programs */,
- void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clUnloadPlatformCompiler)(cl_platform_id /* platform */)
- CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramInfo)(
- cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramBuildInfo)(
- cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */,
- size_t /* param_value_size */, void * /* param_value */,
- size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_kernel(CL_API_CALL *PFN_clCreateKernel)(
- cl_program /* program */, const char * /* kernel_name */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateKernelsInProgram)(
- cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */,
- cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArg)(cl_kernel /* kernel */, cl_uint /* arg_index */,
- size_t /* arg_size */, const void * /* arg_value */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArgSVMPointer)(
- cl_kernel /* kernel */, cl_uint /* arg_index */,
- const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelExecInfo)(
- cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */,
- const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelInfo)(
- cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelArgInfo)(
- cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */,
- size_t /* param_value_size */, void * /* param_value */,
- size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelWorkGroupInfo)(
- cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */,
- size_t /* param_value_size */, void * /* param_value */,
- size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clWaitForEvents)(
- cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetEventInfo)(
- cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_event(CL_API_CALL *PFN_clCreateUserEvent)(
- cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clRetainEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetUserEventStatus)(
- cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clSetEventCallback)(
- cl_event /* event */, cl_int /* command_exec_callback_type */,
- void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
- void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clGetEventProfilingInfo)(
- cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */,
- void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFlush)(cl_command_queue /* command_queue */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFinish)(cl_command_queue /* command_queue */)
- CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
- size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBufferRect)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
- const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
- size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
- size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
- size_t /* offset */, size_t /* size */, const void * /* ptr */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBufferRect)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
- const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
- size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
- size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */,
- size_t /* pattern_size */, size_t /* offset */, size_t /* size */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
- size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferRect)(
- cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
- const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */,
- size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */,
- size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadImage)(
- cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */,
- const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */,
- size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteImage)(
- cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */,
- const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */,
- size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillImage)(
- cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */,
- const size_t * /* origin[3] */, const size_t * /* region[3] */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImage)(
- cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */,
- const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */,
- const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImageToBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */,
- const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferToImage)(
- cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */,
- size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapBuffer)(
- cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */,
- cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapImage)(
- cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */,
- cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */,
- size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueUnmapMemObject)(
- cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMigrateMemObjects)(
- cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */,
- const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNDRangeKernel)(
- cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */,
- const size_t * /* global_work_offset */, const size_t * /* global_work_size */,
- const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNativeKernel)(
- cl_command_queue /* command_queue */, void(CL_CALLBACK * /*user_func*/)(void *),
- void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */,
- const cl_mem * /* mem_list */, const void ** /* args_mem_loc */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarkerWithWaitList)(
- cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrierWithWaitList)(
- cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMFree)(
- cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */,
- void *[] /* svm_pointers[] */,
- void(CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
- cl_uint /* num_svm_pointers */,
- void *[] /* svm_pointers[] */, void * /* user_data */),
- void * /* user_data */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemcpy)(
- cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */,
- const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemFill)(
- cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */,
- size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMap)(
- cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */,
- void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMUnmap)(
- cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddressForPlatform)(
- cl_platform_id /* platform */, const char * /* func_name */)CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage2D)(cl_context /* context */, cl_mem_flags /* flags */,
- const cl_image_format * /* image_format */,
- size_t /* image_width */,
- size_t /* image_height */,
- size_t /* image_row_pitch */,
- void * /* host_ptr */, cl_int * /* errcode_ret */);
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage3D)(
- cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
- size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */,
- size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */,
- cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarker)(cl_command_queue /* command_queue */,
- cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
- cl_uint /* num_events */,
- const cl_event * /* event_list */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrier)(cl_command_queue /* command_queue */);
-typedef cl_int(CL_API_CALL *PFN_clUnloadCompiler)();
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddress)(const char * /* func_name */);
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueue)(
- cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */,
- cl_int * /* errcode_ret */);
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSampler)(cl_context /* context */,
- cl_bool /* normalized_coords */,
- cl_addressing_mode /* addressing_mode */,
- cl_filter_mode /* filter_mode */,
- cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueTask)(cl_command_queue /* command_queue */,
- cl_kernel /* kernel */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-// OpenGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int *);
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLTexture)(
- cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */,
- cl_GLint /* miplevel */, cl_GLuint /* texture */,
- cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireGLObjects)(cl_command_queue /* command_queue */,
- cl_uint /* num_objects */,
- const cl_mem * /* mem_objects */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseGLObjects)(
- cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */,
- cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
- cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-
-// cl_khr_egl_event extension
-
-// CLeglDisplayKHR is an opaque handle to an EGLDisplay
-typedef void *CLeglDisplayKHR;
-
-// CLeglSyncKHR is an opaque handle to an EGLSync object
-typedef void *CLeglSyncKHR;
-
-typedef cl_event(CL_API_CALL *PFN_clCreateEventFromEGLSyncKHR)(cl_context /* context */,
- CLeglSyncKHR /* sync */,
- CLeglDisplayKHR /* display */,
- cl_int * /* errcode_ret */);
-
-// EGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromEGLImageKHR)(
- cl_context /*context*/, CLeglDisplayKHR /*display*/, CLeglImageKHR /*image*/,
- cl_mem_flags /*flags*/, const cl_egl_image_properties_khr * /*properties*/,
- cl_int * /*errcode_ret*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireEGLObjectsKHR)(
- cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
- cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseEGLObjectsKHR)(
- cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
- cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-
-extern PFN_clGetPlatformIDs clGetPlatformIDs;
-extern PFN_clGetPlatformInfo clGetPlatformInfo;
-extern PFN_clGetDeviceIDs clGetDeviceIDs;
-extern PFN_clGetDeviceInfo clGetDeviceInfo;
-extern PFN_clCreateSubDevices clCreateSubDevices;
-extern PFN_clRetainDevice clRetainDevice;
-extern PFN_clReleaseDevice clReleaseDevice;
-extern PFN_clCreateContext clCreateContext;
-extern PFN_clCreateContextFromType clCreateContextFromType;
-extern PFN_clRetainContext clRetainContext;
-extern PFN_clReleaseContext clReleaseContext;
-extern PFN_clGetContextInfo clGetContextInfo;
-extern PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-extern PFN_clRetainCommandQueue clRetainCommandQueue;
-extern PFN_clReleaseCommandQueue clReleaseCommandQueue;
-extern PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-extern PFN_clCreateBuffer clCreateBuffer;
-extern PFN_clCreateSubBuffer clCreateSubBuffer;
-extern PFN_clCreateImage clCreateImage;
-extern PFN_clCreatePipe clCreatePipe;
-extern PFN_clRetainMemObject clRetainMemObject;
-extern PFN_clReleaseMemObject clReleaseMemObject;
-extern PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-extern PFN_clGetMemObjectInfo clGetMemObjectInfo;
-extern PFN_clGetImageInfo clGetImageInfo;
-extern PFN_clGetPipeInfo clGetPipeInfo;
-extern PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-extern PFN_clSVMAlloc clSVMAlloc;
-extern PFN_clSVMFree clSVMFree;
-extern PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-extern PFN_clRetainSampler clRetainSampler;
-extern PFN_clReleaseSampler clReleaseSampler;
-extern PFN_clGetSamplerInfo clGetSamplerInfo;
-extern PFN_clCreateProgramWithSource clCreateProgramWithSource;
-extern PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-extern PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-extern PFN_clRetainProgram clRetainProgram;
-extern PFN_clReleaseProgram clReleaseProgram;
-extern PFN_clBuildProgram clBuildProgram;
-extern PFN_clCompileProgram clCompileProgram;
-extern PFN_clLinkProgram clLinkProgram;
-extern PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-extern PFN_clGetProgramInfo clGetProgramInfo;
-extern PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-extern PFN_clCreateKernel clCreateKernel;
-extern PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-extern PFN_clRetainKernel clRetainKernel;
-extern PFN_clReleaseKernel clReleaseKernel;
-extern PFN_clSetKernelArg clSetKernelArg;
-extern PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-extern PFN_clSetKernelExecInfo clSetKernelExecInfo;
-extern PFN_clGetKernelInfo clGetKernelInfo;
-extern PFN_clGetKernelArgInfo clGetKernelArgInfo;
-extern PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-extern PFN_clWaitForEvents clWaitForEvents;
-extern PFN_clGetEventInfo clGetEventInfo;
-extern PFN_clCreateUserEvent clCreateUserEvent;
-extern PFN_clRetainEvent clRetainEvent;
-extern PFN_clReleaseEvent clReleaseEvent;
-extern PFN_clSetUserEventStatus clSetUserEventStatus;
-extern PFN_clSetEventCallback clSetEventCallback;
-extern PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-extern PFN_clFlush clFlush;
-extern PFN_clFinish clFinish;
-extern PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-extern PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-extern PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-extern PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-extern PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-extern PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-extern PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-extern PFN_clEnqueueReadImage clEnqueueReadImage;
-extern PFN_clEnqueueWriteImage clEnqueueWriteImage;
-extern PFN_clEnqueueFillImage clEnqueueFillImage;
-extern PFN_clEnqueueCopyImage clEnqueueCopyImage;
-extern PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-extern PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-extern PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-extern PFN_clEnqueueMapImage clEnqueueMapImage;
-extern PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-extern PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-extern PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-extern PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-extern PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-extern PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-extern PFN_clEnqueueSVMFree clEnqueueSVMFree;
-extern PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-extern PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-extern PFN_clEnqueueSVMMap clEnqueueSVMMap;
-extern PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-extern PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-extern PFN_clCreateImage2D clCreateImage2D;
-extern PFN_clCreateImage3D clCreateImage3D;
-extern PFN_clEnqueueMarker clEnqueueMarker;
-extern PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-extern PFN_clEnqueueBarrier clEnqueueBarrier;
-extern PFN_clUnloadCompiler clUnloadCompiler;
-extern PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-extern PFN_clCreateCommandQueue clCreateCommandQueue;
-extern PFN_clCreateSampler clCreateSampler;
-extern PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-extern PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-extern PFN_clCreateFromGLTexture clCreateFromGLTexture;
-extern PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-extern PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-extern PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-// For convenient image creation
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage2D
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
- const cl_image_format *image_format, const cl_image_desc *image_desc,
- void *host_ptr, cl_int *errcode_ret);
-
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage3D
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
- const cl_image_format *image_format, const cl_image_desc *image_desc,
- void *host_ptr, cl_int *errcode_ret);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc b/runtime/onert/backend/gpu_cl/open_cl/Operations.cc
deleted file mode 100644
index 2608b5364..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc
+++ /dev/null
@@ -1,704 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Operations.h"
-#include "open_cl/Operations.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#include <unordered_map>
-
-#include "absl/container/flat_hash_map.h"
-
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Padding2D &Padding2D::operator=(const Padding2D &value)
-{
- prepended = value.prepended;
- appended = value.appended;
- return *this;
-}
-
-bool Padding2D::operator==(const Padding2D &value)
-{
- return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding2D::operator!=(const Padding2D &value) { return !(*this == value); }
-
-Padding2D &Padding2D::operator-(const Padding2D &value)
-{
- prepended.h -= value.prepended.h;
- prepended.w -= value.prepended.w;
- appended.h -= value.appended.h;
- appended.w -= value.appended.w;
- return *this;
-}
-
-Padding3D &Padding3D::operator=(const Padding3D &value)
-{
- prepended = value.prepended;
- appended = value.appended;
- return *this;
-}
-
-bool Padding3D::operator==(const Padding3D &value)
-{
- return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding3D::operator!=(const Padding3D &value) { return !(*this == value); }
-
-Padding3D &Padding3D::operator-(const Padding3D &value)
-{
- prepended.h -= value.prepended.h;
- prepended.w -= value.prepended.w;
- prepended.d -= value.prepended.d;
- appended.h -= value.appended.h;
- appended.w -= value.appended.w;
- appended.d -= value.appended.d;
- return *this;
-}
-
-std::string ToString(enum OperationType op)
-{
- switch (op)
- {
- // case OperationType::ABS:
- // return "abs";
- case OperationType::ADD:
- return "add";
- // case OperationType::CONCAT:
- // return "concat";
- // case OperationType::COS:
- // return "cos";
- // case OperationType::EXP:
- // return "exp";
- // case OperationType::LOG:
- // return "log";
- // case OperationType::NEG:
- // return "neg";
- // case OperationType::POOLING_2D:
- // return "pooling_2d";
- // case OperationType::REDUCE_MAXIMUM:
- // return "reduce_maximum";
- // case OperationType::REDUCE_MINIMUM:
- // return "reduce_minimum";
- // case OperationType::REDUCE_PRODUCT:
- // return "reduce_product";
- // case OperationType::REDUCE_SUM:
- // return "reduce_sum";
- // case OperationType::RESIZE:
- // return "resize";
- // case OperationType::RELU:
- // return "relu";
- // case OperationType::RSQRT:
- // return "rsqrt";
- // case OperationType::SQRT:
- // return "sqrt";
- // case OperationType::SQUARE:
- // return "square";
- case OperationType::UNKNOWN:
- return "unknown_operation";
- }
- return "";
-}
-
-OperationType OperationTypeFromString(const std::string &name)
-{
- static const auto operations = new std::unordered_map<std::string, OperationType>({
- // {"abs", OperationType::ABS},
- {"add", OperationType::ADD},
- // {"concat", OperationType::CONCAT},
- // {"cos", OperationType::COS},
- // {"exp", OperationType::EXP},
- // {"log", OperationType::LOG},
- // {"neg", OperationType::NEG},
- // {"pooling_2d", OperationType::POOLING_2D},
- // {"reduce_maximum", OperationType::REDUCE_MAXIMUM},
- // {"reduce_minimum", OperationType::REDUCE_MINIMUM},
- // {"reduce_product", OperationType::REDUCE_PRODUCT},
- // {"reduce_sum", OperationType::REDUCE_SUM},
- // {"relu", OperationType::RELU},
- // {"resize", OperationType::RESIZE},
- // {"rsqrt", OperationType::RSQRT},
- // {"sqrt", OperationType::SQRT},
- // {"square", OperationType::SQUARE},
- });
- auto op = operations->find(name);
- return op == operations->end() ? OperationType::UNKNOWN : op->second;
-}
-
-namespace
-{
-
-template <typename T> T DivideRoundUp(T n, T divisor) { return (n - 1) / divisor + 1; }
-
-int32_t CalculateOutputSizeBeforeStrides(int32_t input, int32_t kernel, int32_t padding,
- int32_t dilation)
-{
- const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
- return input + padding - dilated_kernel + 1;
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Convolution2DAttributes &attr)
-{
- return CalculateOutputSizeBeforeStrides(
- input.get<T>(), attr.weights.shape.get<T>(),
- attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Convolution3DAttributes &attr)
-{
- return CalculateOutputSizeBeforeStrides(
- input.get<T>(), attr.weights.shape.get<T>(),
- attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Pooling2DAttributes &attr)
-{
- return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
- attr.padding.prepended.get<T>() +
- attr.padding.appended.get<T>(),
- /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Pooling3DAttributes &attr)
-{
- return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
- attr.padding.prepended.get<T>() +
- attr.padding.appended.get<T>(),
- /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
- return (input.get<T>() - 1) * attr.stride.get<T>() -
- (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
- attr.weights.shape.get<T>() + attr.adjacent.get<T>();
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
- return (input.get<T>() - 1) * attr.stride.get<T>() -
- (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
- attr.weights.shape.get<T>();
-}
-
-inline int32_t StridedSize(int32_t size, int32_t stride)
-{
- return stride == 0 ? -1 : DivideRoundUp(size, stride);
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWC &input, const AttrT &attr)
-{
- return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
- attr.strides.template get<AxisT>());
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWDC &input, const AttrT &attr)
-{
- return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
- attr.strides.template get<AxisT>());
-}
-
-int32_t CalculateSamePadding(int32_t input, int32_t kernel, int32_t dilation, int32_t stride)
-{
- const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
- return std::max(0, dilated_kernel - (input - 1) % stride - 1);
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
- attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
- attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
- /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
- /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
- /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
- /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
- /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
- return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
- /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-Padding2D MakeSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
- int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
- int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
- Padding2D padding;
- padding.prepended = HW(padding_height / 2, padding_width / 2);
- padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
- return padding;
-}
-
-Padding3D MakeSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
- int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
- int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
- int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
- Padding3D padding;
- padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
- padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
- padding_depth - padding_depth / 2);
- return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding2D MakeSamePadding(const BHWC &input, const AttrT &attr)
-{
- int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
- int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
- Padding2D padding;
- padding.prepended = HW(padding_height / 2, padding_width / 2);
- padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
- return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding3D MakeSamePadding(const BHWDC &input, const AttrT &attr)
-{
- int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
- int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
- int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
- Padding3D padding;
- padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
- padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
- padding_depth - padding_depth / 2);
- return padding;
-}
-
-} // namespace
-
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
- return BHWC(
- input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
- input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
- return BHWDC(
- input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
- input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w,
- input.d * attr.strides.d - attr.padding.prepended.d - attr.padding.appended.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr)
-{
- return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr), input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr)
-{
- return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
- input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr)
-{
- return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr)
-{
- return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
- return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
- return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
- return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
- attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
- return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
- CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
- attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
- attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr)
-{
- (void)input;
- return BHWC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
- StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
- StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
- StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr)
-{
- (void)input;
- return BHWDC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
- StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
- StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
- StridedSize(attr.ends.d - attr.starts.d, attr.strides.d),
- StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr)
-{
- return BHWC(
- attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
- attr.appended.w + attr.prepended.w + input.w, attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr)
-{
- return BHWDC(
- attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
- attr.appended.w + attr.prepended.w + input.w, attr.appended.d + attr.prepended.d + input.d,
- attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr)
-{
- return BHWC(input.b, 1, 1, attr.weights.shape.o);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr)
-{
- const int b = attr.dims.find(Axis::BATCH) == attr.dims.end() ? input.b : 1;
- const int h = attr.dims.find(Axis::HEIGHT) == attr.dims.end() ? input.h : 1;
- const int w = attr.dims.find(Axis::WIDTH) == attr.dims.end() ? input.w : 1;
- const int c = attr.dims.find(Axis::CHANNELS) == attr.dims.end() ? input.c : 1;
- return BHWC(b, h, w, c);
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
- BHWC *output_shape)
-{
- BHWC new_shape = input[0];
- switch (attr.axis)
- {
- case Axis::CHANNELS:
- for (size_t i = 1; i < input.size(); i++)
- {
- if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Height, Width and Batch must be the same when concatenating "
- "by channels axis");
- }
- new_shape.c += input[i].c;
- }
- break;
- case Axis::HEIGHT:
- for (size_t i = 1; i < input.size(); i++)
- {
- if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Channels, Width and Batch must be the same when concatenating "
- "by height axis");
- }
- new_shape.h += input[i].h;
- }
- break;
- case Axis::WIDTH:
- for (size_t i = 1; i < input.size(); i++)
- {
- if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Height, Channels and Batch must be the same when concatenating "
- "by width axis");
- }
- new_shape.w += input[i].w;
- }
- break;
- case Axis::BATCH:
- for (size_t i = 1; i < input.size(); i++)
- {
- if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].w != new_shape.w)
- {
- return absl::InvalidArgumentError(
- "Width, Height and Channels must be the same when concatenating "
- "by batch axis");
- }
- new_shape.b += input[i].b;
- }
- break;
- default:
- return absl::InvalidArgumentError("Invalid axis");
- break;
- }
- *output_shape = new_shape;
- return absl::OkStatus();
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
- BHWDC *output_shape)
-{
- BHWDC new_shape = input[0];
- switch (attr.axis)
- {
- case Axis::CHANNELS:
- for (size_t i = 1; i < input.size(); ++i)
- {
- if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].d != new_shape.d ||
- input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError("Height, Width, Batch and Depth must be the same when "
- "concatenating "
- "by channels axis");
- }
- new_shape.c += input[i].c;
- }
- break;
- case Axis::HEIGHT:
- for (size_t i = 1; i < input.size(); ++i)
- {
- if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].d != new_shape.d ||
- input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Width, Depth, Batch and Channels must be the same when "
- "concatenating "
- "by height axis");
- }
- new_shape.h += input[i].h;
- }
- break;
- case Axis::WIDTH:
- for (size_t i = 1; i < input.size(); ++i)
- {
- if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].d != new_shape.d ||
- input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Height, Depth, Batch and Channels must be the same when "
- "concatenating "
- "by width axis");
- }
- new_shape.w += input[i].w;
- }
- break;
- case Axis::DEPTH:
- for (size_t i = 1; i < input.size(); ++i)
- {
- if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
- input[i].b != new_shape.b)
- {
- return absl::InvalidArgumentError(
- "Width, Height, Batch and Channels must be the same when "
- "concatenating "
- "by depth axis");
- }
- new_shape.d += input[i].d;
- }
- break;
- case Axis::BATCH:
- for (size_t i = 1; i < input.size(); ++i)
- {
- if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
- input[i].d != new_shape.d)
- {
- return absl::InvalidArgumentError(
- "Width, Height, Depth and Channels must be the same when "
- "concatenating "
- "by batch axis");
- }
- new_shape.b += input[i].b;
- }
- break;
- default:
- return absl::InvalidArgumentError("Invalid axis");
- }
- *output_shape = new_shape;
- return absl::OkStatus();
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
- return MakeSamePadding(input, attr);
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr)
-{
- return attr.align_corners && input_size > 1 && output_size > 1
- ? static_cast<float>(input_size - 1) / (output_size - 1)
- : static_cast<float>(input_size) / output_size;
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr)
-{
- return attr.align_corners && input_size > 1 && output_size > 1
- ? static_cast<float>(input_size - 1) / (output_size - 1)
- : static_cast<float>(input_size) / output_size;
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr)
-{
- return BHWC(input.b, attr.new_shape.h, attr.new_shape.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr)
-{
- return BHWDC(input.b, attr.new_shape.h, attr.new_shape.w, attr.new_shape.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr)
-{
- return BHWC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
- input.get(attr.perm.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr)
-{
- return BHWDC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
- input.get(attr.perm.d), input.get(attr.perm.c));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.h b/runtime/onert/backend/gpu_cl/open_cl/Operations.h
deleted file mode 100644
index 825eb90a4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.h
+++ /dev/null
@@ -1,586 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-
-#include <cstdint>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "absl/types/variant.h"
-
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class OperationType
-{
- UNKNOWN = 0,
- // ABS,
- ADD,
- // BATCH_TO_SPACE,
- // BATCH_NORMALIZATION,
- // BATCHED_MATMUL,
- // CONCAT,
- // CONST,
- // CONVOLUTION_2D,
- // CONVOLUTION_TRANSPOSED,
- // COPY,
- // COS,
- // DEPTHWISE_CONVOLUTION,
- // DIV,
- // ELU,
- // EQUAL,
- // EXP,
- // FULLY_CONNECTED,
- // GREATER,
- // GREATER_EQUAL,
- // HARD_SWISH,
- // LESS,
- // LESS_EQUAL,
- // LOG,
- // LSTM,
- // MAXIMUM,
- // MAX_UNPOOLING_2D,
- // MEAN,
- // MEAN_STDDEV_NORMALIZATION,
- // MINIMUM,
- // MUL,
- // NEG,
- // NOT_EQUAL,
- // PAD,
- // POOLING_2D,
- // POW,
- // PRELU,
- // Used to accurately run inference on quantized models.
- // QUANTIZE_AND_DEQUANTIZE,
- // REDUCE_MAXIMUM,
- // REDUCE_MINIMUM,
- // REDUCE_PRODUCT,
- // REDUCE_SUM,
- // RELU,
- // RESHAPE,
- // RESIZE,
- // RSQRT,
- // SIGMOID,
- // SIN,
- // SLICE,
- // SOFTMAX,
- // SPACE_TO_BATCH,
- // SPACE_TO_DEPTH,
- // SQRT,
- // SQUARE,
- // SQUARED_DIFF,
- // SUB,
- // TANH,
- // TRANSPOSE,
-};
-
-std::string ToString(enum OperationType op);
-
-OperationType OperationTypeFromString(const std::string &name);
-
-typedef absl::variant<absl::monostate, InternalTensor<HWC, DataType::FLOAT32>,
- InternalTensor<Linear, DataType::FLOAT32>, float>
- TensorOrScalar;
-
-struct Padding2D
-{
- Padding2D() = default;
- Padding2D(const Padding2D &);
- Padding2D &operator=(const Padding2D &value);
- bool operator==(const Padding2D &value);
- bool operator!=(const Padding2D &value);
- Padding2D &operator-(const Padding2D &value);
-
- // Padding values for every axis (if needed), where 'prepended' defines
- // padding for the beginning of each axis and 'appended' represents end part
- // of the corresponding axis.
- HW prepended = HW(-1, -1);
- HW appended = HW(-1, -1);
-};
-
-struct Padding3D
-{
- Padding3D() = default;
- Padding3D(const Padding3D &);
- Padding3D &operator=(const Padding3D &value);
- bool operator==(const Padding3D &value);
- bool operator!=(const Padding3D &value);
- Padding3D &operator-(const Padding3D &value);
- // Padding values for every axis (if needed), where 'prepended' defines
- // padding for the beginning of each axis and 'appended' represents end part
- // of the corresponding axis.
- HWD prepended = HWD(0, 0, 0);
- HWD appended = HWD(0, 0, 0);
-};
-
-struct Crop2D : public Padding2D
-{
-};
-
-struct SpaceToBatchAttributes
-{
- HW block;
- Padding2D padding;
-};
-
-struct BatchToSpaceAttributes
-{
- HW block;
- Crop2D crop;
-};
-
-enum class PoolingType
-{
- UNDEFINED = 0,
-
- // average pooling
- AVERAGE = 1,
-
- // max pooling
- MAX = 2,
-};
-
-struct Pooling2DAttributes
-{
- PoolingType type = PoolingType::UNDEFINED;
- // Strides for every axis.
- HW strides = HW(-1, -1);
- HW kernel = HW(-1, -1);
- Padding2D padding;
- // NOTE(akulik): technically the number of outputs from Pooling node indicates
- // whether indices are needed or not, but I decided to keep it inside
- // attributes to simplify processing.
- bool output_indices = false;
-};
-
-struct Pooling3DAttributes
-{
- PoolingType type = PoolingType::UNDEFINED;
- // Strides for every axis.
- HWD strides = HWD(0, 0, 0);
- HWD kernel = HWD(0, 0, 0);
- Padding3D padding;
- // NOTE(akulik): technically the number of outputs from Pooling node indicates
- // whether indices are needed or not, but I decided to keep it inside
- // attributes to simplify processing.
- bool output_indices = false;
-};
-
-struct MaxUnpooling2DAttributes
-{
- // Strides for every axis.
- HW strides = HW(-1, -1);
- HW kernel = HW(-1, -1);
- Padding2D padding;
-};
-
-struct MaxUnpooling3DAttributes
-{
- // Strides for every axis.
- HWD strides = HWD(0, 0, 0);
- HWD kernel = HWD(0, 0, 0);
- Padding3D padding;
-};
-
-struct MeanAttributes
-{
- // The vector of dimensions to calculate mean along.
- std::set<Axis> dims;
-};
-
-struct ConcatAttributes
-{
- // Defines axis by which to concat on.
- Axis axis = Axis::UNKNOWN;
-};
-
-// @return shape of a tensor after MaxUnpooling2D operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return shape of a tensor after MaxUnpooling3D operation is applied to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-// @return shape of a tensor after Pooling2D operation is applied to the given
-// input.
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return shape of a tensor after Pooling3D operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return shape of a tensor after Concat operation is applied to the given
-// input.
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
- BHWC *output_shape);
-
-// @return shape of a tensor after Concat operation is applied to the given
-// input.
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
- BHWDC *output_shape);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-struct Convolution2DAttributes
-{
- HW strides = HW(1, 1); // Along each axis.
- HW dilations = HW(1, 1); // Along each axis.
- Padding2D padding;
-
- InternalTensor<OHWI, DataType::FLOAT32> weights;
- InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct Convolution3DAttributes
-{
- HWD strides = HWD(0, 0, 0); // Along each axis.
- HWD dilations = HWD(0, 0, 0); // Along each axis.
- Padding3D padding;
-
- InternalTensor<OHWDI, DataType::FLOAT32> weights;
- InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-// @return shape of a tensor after Convolution2D operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return shape of a tensor after Convolution3D operation is applied to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr);
-
-struct ConvolutionTransposedAttributes
-{
- HW stride = HW(1, 1); // Along each axis.
- HW adjacent; // TODO(sorokin): No op on Flow.
- Padding2D padding;
-
- InternalTensor<OHWI, DataType::FLOAT32> weights;
- InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct ConvolutionTransposed3DAttributes
-{
- HWD stride = HWD(0, 0, 0); // Along each axis.
- Padding3D padding;
-
- InternalTensor<OHWDI, DataType::FLOAT32> weights;
- InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed3D operation is applied
-// to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-struct DepthwiseConvolution2DAttributes : public Convolution2DAttributes
-{
-};
-struct DepthwiseConvolution3DAttributes : public Convolution3DAttributes
-{
-};
-
-// @return shape of a tensor after DepthwiseConvolution2D operation is applied
-// to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return shape of a tensor after DepthwiseConvolution3D operation is applied
-// to the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// f(x):= {
-// if x < 0 : x -> alpha * x
-// if x >= 0 : x -> min(clip, x)
-// }
-//
-// Examples:
-// - ReLU: clip = 0, alpha = 0
-// - ReLU6: clip = 6, alpha = 0
-// - Leaky ReLU: clip = 0, alpha = a
-struct ReLUAttributes
-{
- // clip <= 0 mean it is not set.
- float clip = 0;
-
- float alpha = 0;
-};
-
-struct PReLUAttributes
-{
- // clip <= 0 mean it is not set.
- float clip = 0;
-
- // If alpha is linear, then it is sharded across CHANNELS axis, otherwise
- // full shape alpha is required.
- absl::variant<InternalTensor<Linear, DataType::FLOAT32>, InternalTensor<HWC, DataType::FLOAT32>>
- alpha;
-};
-
-struct ReduceAttributes
-{
- Axis axis = Axis::UNKNOWN;
-};
-
-struct SoftmaxAttributes
-{
- Axis axis = Axis::UNKNOWN;
-};
-
-enum LstmKernelType
-{
- FULL = 0,
- BASIC = 1, // Currently, only basic is supported.
-};
-
-struct LstmAttributes
-{
- LstmKernelType kernel_type = LstmKernelType::BASIC;
-};
-
-enum class SamplingType
-{
- UNKNOWN = 0,
- NEAREST = 1,
- BILINEAR = 2,
-};
-
-struct Resize2DAttributes
-{
- HW new_shape;
-
- SamplingType type = SamplingType::UNKNOWN;
-
- // If true, the centers of the 4 corner pixels of the input and output tensors
- // are aligned, preserving the values at the corner pixels. Defaults to false.
- bool align_corners = false;
-
- bool half_pixel_centers = false;
-};
-
-// TODO(b/147771327): rename to Resize3D
-struct Resize3DAttributes
-{
- HWD new_shape;
-
- SamplingType type = SamplingType::NEAREST;
-
- // If true, the centers of the 8 corner pixels of the input and output tensors
- // are aligned, preserving the values at the corner pixels. Defaults to false.
- bool align_corners = false;
-
- bool half_pixel_centers = false;
-};
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr);
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr);
-
-enum class PaddingContentType
-{
- ZEROS = 0,
- REFLECT = 1,
- EDGE = 2,
-};
-
-struct PadAttributes
-{
- PaddingContentType type = PaddingContentType::ZEROS;
-
- BHWC prepended;
- BHWC appended;
-};
-
-// @return shape of a tensor after Pad operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr);
-
-struct Pad3DAttributes
-{
- PaddingContentType type = PaddingContentType::ZEROS;
-
- BHWDC prepended;
- BHWDC appended;
-};
-
-// @return shape of a tensor after Pad3D operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr);
-
-struct ConstTensorAttributes
-{
- InternalTensor<BHWC, DataType::FLOAT32> tensor;
-};
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct SliceAttributes
-{
- // Specifies start and end dimensions for slicing.
- BHWC starts;
- BHWC ends;
-
- // Stride should be >= 1.
- BHWC strides;
-};
-
-// @return shape of a tensor after Slice2D operation is applied to the given
-// input.
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr);
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct Slice3DAttributes
-{
- // Specifies start and end dimensions for slicing.
- BHWDC starts;
- BHWDC ends;
-
- // Stride should be >= 1.
- BHWDC strides;
-};
-
-// @return shape of a tensor after Slice3D operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr);
-
-struct FullyConnectedAttributes
-{
- InternalTensor<OHWI, DataType::FLOAT32> weights;
- InternalTensor<Linear, DataType::FLOAT32> bias;
-};
-
-// @return shape of a tensor after FullyConnected operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr);
-
-// @return shape of a tensor after Mean operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr);
-
-struct ElementwiseAttributes
-{
- TensorOrScalar param;
- // For elementwise operation with 2 inputs op(A, B), runtime_tensor_is_second
- // true when runtime tensor is B(on second position). this is important for
- // ops that non commutative, for example substract.
- bool runtime_tensor_is_second = false;
-};
-
-struct ReshapeAttributes
-{
- BHWC new_shape;
-};
-
-struct Reshape3DAttributes
-{
- BHWDC new_shape;
-};
-
-struct TransposeAttributes
-{
- // A permutation of the dimensions of input tensor
- BHWC perm;
-};
-
-// @return shape of a tensor after Transpose operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr);
-
-struct Transpose3DAttributes
-{
- // A permutation of the dimensions of input tensor
- BHWDC perm;
-};
-
-// @return shape of a tensor after Transpose3D operation is applied to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr);
-
-struct SpaceToDepthAttributes
-{
- int block_size;
-};
-
-// These help perform a combination of Quantize & Dequantize to adjust float
-// values like quantized inference would.
-struct QuantizeAndDequantizeAttributes
-{
- float min = 0;
- float max = 0;
- float scale = 0;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc b/runtime/onert/backend/gpu_cl/open_cl/Precision.cc
deleted file mode 100644
index bd908bd43..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string ToString(CalculationsPrecision precision)
-{
- switch (precision)
- {
- case CalculationsPrecision::F32_F16:
- return "CalculationsPrecision::F32_F16";
- case CalculationsPrecision::F32:
- return "CalculationsPrecision::F32";
- case CalculationsPrecision::F16:
- return "CalculationsPrecision::F16";
- }
- return " ";
-}
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision)
-{
- if (precision == CalculationsPrecision::F32)
- {
- return DataType::FLOAT32;
- }
- else
- {
- return DataType::FLOAT16;
- }
- return DataType::UNKNOWN;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.h b/runtime/onert/backend/gpu_cl/open_cl/Precision.h
deleted file mode 100644
index cb910c783..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-
-#include <string>
-
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CalculationsPrecision
-{
- F32,
- F32_F16,
- F16
-};
-// F32 - all data and all math ops in F32
-// F16 - all data and all math ops in F16
-// F32_F16 - as F16, but some operations (Convolution,
-// DepthwiseConvolution, FullyConnected, ConvolutionTransposed)
-// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
-// than converts this partial sum to F32 and add to accumulator.
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);
-
-std::string ToString(CalculationsPrecision precision);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc
deleted file mode 100644
index 350d7a1c5..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ProgramCache.h"
-
-#include <cstdint>
-#include <string>
-
-#include "ClProgram.h"
-#include "Status.h"
-#include "Util.h"
-#include "farmhash.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(const std::string &code_text,
- const std::string &options,
- bool use_fingerprints)
- : code(code_text), compiler_options(options), use_fingerprint(use_fingerprints)
-{
- const uint64_t code_fingerprint = ::util::Fingerprint64(code);
- const uint64_t options_fingerprint = ::util::Fingerprint64(compiler_options);
- fingerprint = code_fingerprint + options_fingerprint;
-}
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(uint64_t fingerprints)
- : fingerprint(fingerprints), use_fingerprint(true)
-{
-}
-
-ProgramCache::ProgramCache(ProgramCache &&program_cache)
- : use_fingerprints_(program_cache.use_fingerprints_),
- programs_(std::move(program_cache.programs_))
-{
-}
-
-ProgramCache &ProgramCache::operator=(ProgramCache &&program_cache)
-{
- if (this != &program_cache)
- {
- use_fingerprints_ = program_cache.use_fingerprints_;
- programs_ = std::move(program_cache.programs_);
- }
- return *this;
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
- const std::string &function_name,
- const std::vector<CompilerOptions> &compiler_options,
- const CLContext &context, const CLDevice &device,
- CLKernel *result)
-{
- const std::string options = CompilerOptionsToString(device, compiler_options);
- ProgramDescriptor desc{code, options, use_fingerprints_};
- auto it = programs_.find(desc);
- if (it != programs_.end())
- {
- return result->CreateFromProgram(it->second, function_name);
- }
-
- CLProgram program;
- RETURN_IF_ERROR(CreateCLProgram(code, options, context, device, &program));
- RETURN_IF_ERROR(result->CreateFromProgram(program, function_name));
- programs_.insert(std::make_pair(std::move(desc), std::move(program)));
- return absl::OkStatus();
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
- const std::string &function_name,
- const CLContext &context, const CLDevice &device,
- CLKernel *result)
-{
- return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h
deleted file mode 100644
index 3f5ee0215..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/container/flat_hash_map.h"
-#include "absl/types/span.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClKernel.h"
-#include "ClProgram.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ProgramCache
-{
-public:
- ProgramCache() = default;
-
- ProgramCache(ProgramCache &&program_cache);
- ProgramCache &operator=(ProgramCache &&program_cache);
- ProgramCache(const ProgramCache &) = delete;
- ProgramCache &operator=(const ProgramCache &) = delete;
-
- absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
- const std::vector<CompilerOptions> &compiler_options,
- const CLContext &context, const CLDevice &device,
- CLKernel *result);
-
- absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
- const CLContext &context, const CLDevice &device,
- CLKernel *result);
-
-private:
- struct ProgramDescriptor
- {
- ProgramDescriptor() = default;
- ProgramDescriptor(const std::string &code_text, const std::string &options,
- bool use_fingerprint);
- explicit ProgramDescriptor(uint64_t fingerprint);
-
- std::string code;
- std::string compiler_options;
- uint64_t fingerprint;
- bool use_fingerprint;
- };
- struct ProgramDescriptorHasher
- {
- std::size_t operator()(const ProgramDescriptor &k) const
- {
- if (k.use_fingerprint)
- {
- return std::hash<uint64_t>()(k.fingerprint);
- }
- else
- {
- return std::hash<std::string>()(k.code) + std::hash<std::string>()(k.compiler_options);
- }
- }
- };
- struct ProgramDescriptorEqual
- {
- bool operator()(const ProgramDescriptor &a, const ProgramDescriptor &b) const
- {
- if (a.use_fingerprint && b.use_fingerprint)
- {
- return a.fingerprint == b.fingerprint;
- }
- else
- {
- return a.compiler_options == b.compiler_options && a.code == b.code;
- }
- }
- };
-
- // There is a low probability of a hash collision when cache is deserialized
- // because only fingerprints are serialized instead of full source code.
- bool use_fingerprints_ = false;
- absl::flat_hash_map<ProgramDescriptor, CLProgram, ProgramDescriptorHasher, ProgramDescriptorEqual>
- programs_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc b/runtime/onert/backend/gpu_cl/open_cl/Shape.cc
deleted file mode 100644
index 5a2374516..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Shape.h"
-
-#include <stdint.h>
-
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_join.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct GetAxisByIndexFunc
-{
- template <Layout T> Axis operator()() const { return GetAxis<T>(index); }
- int32_t index;
-};
-
-struct GetIndexByAxisFunc
-{
- template <Layout T> int operator()() const { return GetAxisIndex<T>(axis); }
- Axis axis;
-};
-
-struct NumAxisFunc
-{
- template <Layout T> int operator()() const { return Size<T>(); }
-};
-
-} // namespace
-
-std::string ToString(Axis axis)
-{
- switch (axis)
- {
- case Axis::BATCH:
- return "batch";
- case Axis::CHANNELS:
- return "channels";
- case Axis::INPUT_CHANNELS:
- return "input_channels";
- case Axis::OUTPUT_CHANNELS:
- return "output_channels";
- case Axis::HEIGHT:
- return "height";
- case Axis::WIDTH:
- return "width";
- case Axis::VALUE:
- return "value";
- case Axis::DEPTH:
- return "depth";
- case Axis::UNKNOWN:
- return "unknown";
- }
- return "undefined";
-}
-
-std::string ToString(Layout layout)
-{
- switch (layout)
- {
- case Layout::SCALAR:
- return "scalar";
- case Layout::LINEAR:
- return "linear";
- case Layout::HW:
- return "hw";
- case Layout::HWD:
- return "hwd";
- case Layout::CHW:
- return "chw";
- case Layout::HWC:
- return "hwc";
- case Layout::HWDC:
- return "hwdc";
- case Layout::OHWI:
- return "ohwi";
- case Layout::IHWO:
- return "ihwo";
- case Layout::OIHW:
- return "oihw";
- case Layout::IOHW:
- return "iohw";
- case Layout::BHWC:
- return "bhwc";
- case Layout::BHWDC:
- return "bhwdc";
- case Layout::OHWDI:
- return "ohwi";
- case Layout::UNKNOWN:
- return "unknown";
- }
- return "undefined";
-}
-
-Axis GetAxis(Layout layout, int32_t index)
-{
- return DispatchByLayout(layout, GetAxisByIndexFunc{index});
-}
-
-int GetAxisIndex(Layout layout, Axis axis)
-{
- return DispatchByLayout(layout, GetIndexByAxisFunc{axis});
-}
-
-bool HasAxis(Layout layout, Axis axis) { return GetAxisIndex(layout, axis) >= 0; }
-
-int Size(Layout layout) { return DispatchByLayout(layout, NumAxisFunc()); }
-
-std::string ToString(const Shape &s)
-{
- return absl::StrCat("{", ToString(s.layout), ", {", absl::StrJoin(s.dimensions, ", "), "}}");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.h b/runtime/onert/backend/gpu_cl/open_cl/Shape.h
deleted file mode 100644
index 3767e106f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.h
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include <array>
-#include <functional>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Axis
-{
- UNKNOWN = 0,
- CHANNELS = 1,
- INPUT_CHANNELS = 2,
- OUTPUT_CHANNELS = 3,
- HEIGHT = 4,
- WIDTH = 5,
- BATCH = 6,
- VALUE = 7,
- DEPTH = 8,
-};
-
-std::string ToString(Axis t);
-
-// Layout represents axis order.
-enum class Layout
-{
- UNKNOWN = 0,
- SCALAR = 1,
- LINEAR = 2,
- HW = 3,
- CHW = 4,
- HWC = 5,
- OIHW = 6,
- OHWI = 7,
- IHWO = 8,
- IOHW = 9,
- BHWC = 10,
- HWDC = 11,
- BHWDC = 12,
- HWD = 13,
- OHWDI = 14,
-};
-
-std::string ToString(Layout l);
-
-// Returns number of axis for the fixed layout.
-template <Layout T> constexpr int Size();
-
-// Returns number of axis for the given layout.
-int Size(Layout layout);
-
-// Returns Axis for the given index and fixed layout.
-template <Layout T> constexpr Axis GetAxis(int index);
-
-// Returns axis for the given layout and index.
-Axis GetAxis(Layout layout, int32_t index);
-
-// Returns axis index for the given axis and fixed layout.
-template <Layout T> constexpr int GetAxisIndex(Axis axis);
-
-// Returns axis index for the given layout and axis.
-int GetAxisIndex(Layout layout, Axis axis);
-
-// Checks if fixed layout has given axis
-template <Layout T> constexpr bool HasAxis(Axis axis);
-
-// Checks if given layout has given axis
-bool HasAxis(Layout layout, Axis axis);
-
-// Stores Layout(axis set and order) and value for dimensions.
-struct Shape
-{
- Shape() : layout(Layout::UNKNOWN), dimensions() {}
-
- explicit Shape(Layout t) : layout(t), dimensions(Size(t)) {}
-
- Shape(Layout t, std::vector<int32_t> d) : layout(t), dimensions(std::move(d)) {}
-
- bool operator==(const Shape &other) const
- {
- return (layout == other.layout) && (dimensions == other.dimensions);
- }
-
- bool operator!=(const Shape &other) const { return !operator==(other); }
-
- // All methods below are matching same methods defined in StrongShape to
- // make sure generic algorithms work both ways.
-
- // Returns back a dimension or -1 if it is not found.
- template <Axis D> int32_t get() const;
- int32_t get(Axis axis) const;
-
- template <Axis D> bool set(int32_t t);
- bool set(Axis axis, int32_t t);
-
- Axis axis(int index) const { return GetAxis(layout, index); }
-
- int index(Axis axis) const { return GetAxisIndex(layout, axis); }
-
- bool has(Axis axis) const { return HasAxis(layout, axis); }
-
- int64_t DimensionsProduct() const
- {
- return std::accumulate(dimensions.begin(), dimensions.end(), 1ll, std::multiplies<int64_t>());
- }
-
- Layout layout = Layout::UNKNOWN;
-
- std::vector<int32_t> dimensions;
-};
-
-std::string ToString(const Shape &s);
-
-// StrongShape provides convenient explicit access to dimensions stored in
-// shape, e.g. StrongShape<Layout::HW> s; provides s.h and s.w accessors.
-//
-// There is a conversion possible both ways between Shape and StrongShape.
-//
-// OIHW oihw; // specific shape
-// Shape l = oihw.ToShape();
-//
-// OHWI other; // notice not the same but compatible shape.
-// if (!other.Adopt(l)) {
-// // error handling
-// }
-//
-// StrongShape supports the following set of operations:
-//
-// // Returns number of axis in the shape class.
-// static constexpr int size();
-//
-// // Returns Axis for the given index or Axis::UNKNOWN if index
-// // falls outside of the defined range in this shape.
-// static constexpr Axis axis(int index);
-//
-// // Returns index for the given axis or -1 if axis is not defined in this
-// // shape.
-// static constexpr int index(Axis axis);
-//
-// // Getters
-// int32_t get(int index) const;
-// int32_t get(Axis axis) const;
-// int32_t get<Axis>() const;
-//
-// // Setters that return false if set was not successful.
-// bool set(int index, int32_t v);
-// bool set(Axis axis, int32_t v);
-// bool set<Axis>(int32_t v);
-//
-// // Returns shape's layout.
-// static const Layout layout;
-//
-// // Turns specific shape into generic shape.
-// Shape ToShape() const;
-//
-// // Copies all dimensions from the given shape.
-// bool Adopt(const Shape&);
-//
-template <Layout L> struct StrongShape;
-
-using Scalar = StrongShape<Layout::SCALAR>;
-using Linear = StrongShape<Layout::LINEAR>;
-using HW = StrongShape<Layout::HW>;
-using HWD = StrongShape<Layout::HWD>;
-
-// Common tensor shape for CNN models working with images.
-using CHW = StrongShape<Layout::CHW>;
-using HWC = StrongShape<Layout::HWC>;
-using HWDC = StrongShape<Layout::HWDC>;
-using BHWC = StrongShape<Layout::BHWC>;
-using BHWDC = StrongShape<Layout::BHWDC>;
-
-// Tensor shape used in convolution_2d weights.
-using OIHW = StrongShape<Layout::OIHW>;
-using OHWI = StrongShape<Layout::OHWI>;
-using IHWO = StrongShape<Layout::IHWO>;
-using IOHW = StrongShape<Layout::IOHW>;
-
-// Tensor shape used in convolution_3d weights.
-using OHWDI = StrongShape<Layout::OHWDI>;
-
-// -----------------------------------------------------------------------------
-// Everything below are internal implementation details.
-// -----------------------------------------------------------------------------
-
-namespace internal_shape
-{
-
-template <Axis T> struct AxisTraits;
-
-#define TFLITE_GPU_AXIS_TRAITS(AxisName, HolderName) \
- template <> struct AxisTraits<Axis::AxisName> \
- { \
- struct Holder \
- { \
- int32_t HolderName; \
- \
- protected: \
- int32_t operator()() const { return HolderName; } \
- void operator()(int32_t v) { HolderName = v; } \
- }; \
- \
- using dimension_holder_type = Holder; \
- }
-
-TFLITE_GPU_AXIS_TRAITS(CHANNELS, c);
-TFLITE_GPU_AXIS_TRAITS(HEIGHT, h);
-TFLITE_GPU_AXIS_TRAITS(WIDTH, w);
-TFLITE_GPU_AXIS_TRAITS(INPUT_CHANNELS, i);
-TFLITE_GPU_AXIS_TRAITS(OUTPUT_CHANNELS, o);
-TFLITE_GPU_AXIS_TRAITS(BATCH, b);
-TFLITE_GPU_AXIS_TRAITS(VALUE, v);
-TFLITE_GPU_AXIS_TRAITS(DEPTH, d);
-
-#undef TFLITE_GPU_AXIS_TRAITS
-
-template <int N, Axis... As> struct StrongShapeImpl;
-
-template <int N> struct StrongShapeImpl<N>
-{
- static constexpr int size() { return N; }
-
- static constexpr Axis axis(int) { return Axis::UNKNOWN; }
-
- static constexpr int index(Axis) { return -1; }
-
- static constexpr bool has(Axis) { return false; }
-
- int32_t get(Axis) const { return -1; }
-
- int32_t get(int) const { return -1; }
-
- template <Axis B> int32_t get() const { return -1; }
-
- bool set(Axis, int32_t) { return false; }
-
- bool set(int, int32_t) { return false; }
-
- template <Axis B> bool set(int32_t) { return false; }
-};
-
-// Used to deduce number of axis, and to be a child of a proper holder to
-// provide access to the dimension by name
-template <int N, Axis A, Axis... As>
-struct StrongShapeImpl<N, A, As...> : public AxisTraits<A>::dimension_holder_type,
- public StrongShapeImpl<N + 1, As...>
-{
- using dimension_holder_type = typename AxisTraits<A>::dimension_holder_type;
-
- using rest_type = StrongShapeImpl<N + 1, As...>;
-
- StrongShapeImpl() : dimension_holder_type{0}, rest_type() {}
-
- template <typename... Ts>
- explicit StrongShapeImpl(int32_t t, Ts... ts) : dimension_holder_type{t}, rest_type(ts...)
- {
- }
-
- static constexpr Axis axis(int index) { return index == N ? A : rest_type::axis(index); }
-
- static constexpr int index(Axis axis) { return axis == A ? N : rest_type::index(axis); }
-
- static constexpr bool has(Axis axis) { return axis == A ? true : rest_type::has(axis); }
-
- int32_t get(Axis axis) const
- {
- return axis == A ? dimension_holder_type::operator()() : rest_type::get(axis);
- }
-
- template <Axis B> int32_t get() const
- {
- return B == A ? dimension_holder_type::operator()() : rest_type::template get<B>();
- }
-
- int32_t get(int index) const
- {
- return index == N ? dimension_holder_type::operator()() : rest_type::get(index);
- }
-
- bool set(Axis axis, int32_t t)
- {
- if (axis == A)
- {
- dimension_holder_type::operator()(t);
- return true;
- }
- return rest_type::set(axis, t);
- }
-
- bool set(int index, int32_t t)
- {
- if (index == N)
- {
- dimension_holder_type::operator()(t);
- return true;
- }
- return rest_type::set(index, t);
- }
-
- template <Axis B> bool set(int32_t t)
- {
- if (A == B)
- {
- dimension_holder_type::operator()(t);
- return true;
- }
- return rest_type::template set<B>(t);
- }
-};
-
-template <Layout T> struct LayoutTraits;
-
-#define TFLITE_GPU_LAYOUT_TRAITS(LayoutName, ...) \
- template <> struct LayoutTraits<Layout::LayoutName> \
- { \
- using strong_shape_type = StrongShapeImpl<0, __VA_ARGS__>; \
- }
-
-TFLITE_GPU_LAYOUT_TRAITS(HW, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWD, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH);
-TFLITE_GPU_LAYOUT_TRAITS(OHWI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
- Axis::INPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OIHW, Axis::OUTPUT_CHANNELS, Axis::INPUT_CHANNELS, Axis::HEIGHT,
- Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IOHW, Axis::INPUT_CHANNELS, Axis::OUTPUT_CHANNELS, Axis::HEIGHT,
- Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IHWO, Axis::INPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
- Axis::OUTPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(CHW, Axis::CHANNELS, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWC, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(HWDC, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(LINEAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(SCALAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(BHWC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(BHWDC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
- Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OHWDI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
- Axis::INPUT_CHANNELS);
-
-#undef TFLITE_GPU_LAYOUT_TRAITS
-
-template <> struct LayoutTraits<Layout::UNKNOWN>
-{
- using strong_shape_type = StrongShapeImpl<0>;
-};
-
-template <Axis A> struct DimensionGetterFixedAxisFunc
-{
- template <Layout T> int32_t operator()() const
- {
- constexpr int i = GetAxisIndex<T>(A);
- return i >= 0 && i < l->dimensions.size() ? l->dimensions[i] : -1;
- }
- const Shape *l;
-};
-
-struct DimensionGetterFunc
-{
- template <Layout T> int32_t operator()() const
- {
- uint32_t i = GetAxisIndex<T>(axis);
- return i < l->dimensions.size() ? l->dimensions[i] : -1;
- }
- Axis axis;
- const Shape *l;
-};
-
-template <Axis A> struct DimensionSetterFixedAxisFunc
-{
- template <Layout T> bool operator()() const
- {
- constexpr uint32_t i = GetAxisIndex<T>(A);
- if (i < l->dimensions.size())
- {
- l->dimensions[i] = v;
- return true;
- }
- return false;
- }
- Shape *l;
- int32_t v;
-};
-
-struct DimensionSetterFunc
-{
- template <Layout T> bool operator()() const
- {
- uint32_t i = GetAxisIndex<T>(axis);
- if (i < l->dimensions.size())
- {
- l->dimensions[i] = v;
- return true;
- }
- return false;
- }
- Axis axis;
- Shape *l;
- int32_t v;
-};
-
-template <Layout L> struct ToShapeFunc
-{
- template <Layout T> bool operator()() const
- {
- for (int i = 0; i < StrongShape<L>::size(); ++i)
- {
- int index = GetAxisIndex<T>(StrongShape<L>::axis(i));
- if (index < 0)
- return false;
- shape->set(i, l.dimensions[index]);
- }
- return true;
- }
-
- StrongShape<L> *shape;
- const Shape &l;
-};
-
-} // namespace internal_shape
-
-// template <Axis... As>
-template <Layout L> struct StrongShape : public internal_shape::LayoutTraits<L>::strong_shape_type
-{
- using strong_shape_type = typename internal_shape::LayoutTraits<L>::strong_shape_type;
- StrongShape() = default;
-
- template <typename... Ts> explicit StrongShape(Ts... t) : strong_shape_type(t...) {}
-
- constexpr static Layout layout = L;
-
- bool operator==(const StrongShape<L> &shape) const
- {
- // TODO(akulik): implement better alternative.
- return this->ToShape() == shape.ToShape();
- }
-
- bool operator!=(const StrongShape<L> &shape) const
- {
- // TODO(akulik): implement better alternative.
- return this->ToShape() != shape.ToShape();
- }
- bool empty() const { return DimensionsProduct() == 0; }
-
- // Turns StrongShape into generic shape.
- Shape ToShape() const
- {
- std::vector<int32_t> dimensions(StrongShape::size());
- for (int i = 0; i < StrongShape::size(); ++i)
- {
- dimensions[i] = StrongShape::get(i);
- }
- return Shape(L, std::move(dimensions));
- }
-
- // @return all dimensions multiplied
- int64_t DimensionsProduct() const
- {
- int64_t product = 1;
- for (int i = 0; i < StrongShape::size(); ++i)
- {
- product *= StrongShape::get(i);
- }
- return product;
- }
-
- // Translates given coordinates of the layout into a linear index assuming
- // dimensions are sorted in tensor access order e.g. if you access
- // foobar[i][j][k] order of coordinates should be i,j,k.
- int64_t LinearIndex(const std::array<int32_t, StrongShape::size()> &coordinates) const
- {
- int64_t index = coordinates[0];
- for (int i = 1; i < StrongShape::size(); ++i)
- {
- index = index * StrongShape::get(i) + coordinates[i];
- }
- return index;
- }
-
- // Copies all dimensions from the given generic shape into specific shape.
- // It requires shape to have all axis defined in the given
- // StrongShape. For example:
- // - If this shape is OHWI but given shape is OIHW, Adopt will copy all
- // dimensions and return true.
- // - If this shape is OIHW but input shape is HW, Adopt will copy H and W
- // dimensions and return true, but if this shape is HW and given shape
- // OIHW, then Adopt will return false because not all axis are present in
- // the input shape.
- //
- // @return false if generic shape is not compatible.
- bool Adopt(const Shape &shape)
- {
- return DispatchByLayout(shape.layout, internal_shape::ToShapeFunc<L>{this, shape});
- }
-
- // For all axis defined in a given shape copies values to this shape.
- // Therefore, it is possible to copy dimensions from CHW to BCHW, but not
- // the other way around.
- //
- // BCHW bchw;
- // CHW chw;
- // bchw.CopyAllGivenAxis(chw); --> true
- // chw.CopyAllGivenAxis(bchw); --> false
- //
- // @return false if axis in source shape is not defined here, thus value
- // was not copied.
- template <Layout B> bool CopyAllGivenAxis(const StrongShape<B> &source)
- {
- for (int i = 0; i < source.size(); ++i)
- {
- if (!StrongShape::set(source.axis(i), source.get(i)))
- {
- return false;
- }
- }
- return true;
- }
-
- // For all axis defined in this shape copies values from the given shape.
- //
- // BCHW bchw;
- // CHW chw;
- // bchw.CopyAllDefinedAxis(chw); --> false
- // chw.CopyAllDefinedAxis(bchw); --> true
- //
- // @return false if given shape does not have axis defined here,
- // therefore a value was not copied.
- template <Layout B> bool CopyAllDefinedAxis(const StrongShape<B> &source)
- {
- for (int i = 0; i < StrongShape::size(); ++i)
- {
- int source_index = source.index(StrongShape::axis(i));
- if (source_index < 0)
- {
- return false;
- }
- StrongShape::set(i, source.get(source_index)); // always true
- }
- return true;
- }
-
- // Copies values only for matching axis.
- template <Layout B> void CopyMatchingAxis(const StrongShape<B> &source)
- {
- for (int i = 0; i < StrongShape::size(); ++i)
- {
- StrongShape::set(source.axis(i), source.get(i));
- }
- }
-
- // AbslHash function for using in flat hash containers.
- template <typename H> friend H AbslHashValue(H hash_state, const StrongShape &strong_shape)
- {
- for (size_t i = 0; i < strong_shape.size(); ++i)
- {
- hash_state = H::combine(std::move(hash_state), strong_shape.get(i));
- }
- return hash_state;
- }
-};
-
-template <Layout T> inline std::string ToString(const StrongShape<T> &s)
-{
- return ToString(s.ToShape());
-}
-
-template <Layout L> constexpr Layout StrongShape<L>::layout;
-
-template <class F>
-auto DispatchByLayout(Layout type, F f) -> decltype(f.template operator()<Layout::UNKNOWN>())
-{
- switch (type)
- {
- case Layout::HW:
- return f.template operator()<Layout::HW>();
- case Layout::HWD:
- return f.template operator()<Layout::HWD>();
- case Layout::HWC:
- return f.template operator()<Layout::HWC>();
- case Layout::HWDC:
- return f.template operator()<Layout::HWDC>();
- case Layout::CHW:
- return f.template operator()<Layout::CHW>();
- case Layout::OIHW:
- return f.template operator()<Layout::OIHW>();
- case Layout::IOHW:
- return f.template operator()<Layout::IOHW>();
- case Layout::OHWI:
- return f.template operator()<Layout::OHWI>();
- case Layout::IHWO:
- return f.template operator()<Layout::IHWO>();
- case Layout::LINEAR:
- return f.template operator()<Layout::LINEAR>();
- case Layout::SCALAR:
- return f.template operator()<Layout::SCALAR>();
- case Layout::BHWC:
- return f.template operator()<Layout::BHWC>();
- case Layout::BHWDC:
- return f.template operator()<Layout::BHWDC>();
- case Layout::OHWDI:
- return f.template operator()<Layout::OHWDI>();
- case Layout::UNKNOWN:
- return f.template operator()<Layout::UNKNOWN>();
- }
- return f.template operator()<Layout::UNKNOWN>();
-}
-
-template <Layout T> constexpr int Size() { return StrongShape<T>::size(); }
-
-template <Layout T> constexpr Axis GetAxis(int index) { return StrongShape<T>::axis(index); }
-
-template <Layout T> constexpr int GetAxisIndex(Axis axis) { return StrongShape<T>::index(axis); }
-
-template <Layout T> constexpr bool HasAxis(Axis axis) { return StrongShape<T>::has(axis); }
-
-template <Axis D> inline int32_t Shape::get() const
-{
- return DispatchByLayout(layout, internal_shape::DimensionGetterFixedAxisFunc<D>{this});
-}
-
-inline int32_t Shape::get(Axis axis) const
-{
- return DispatchByLayout(layout, internal_shape::DimensionGetterFunc{axis, this});
-}
-
-template <Axis D> inline bool Shape::set(int32_t t)
-{
- return DispatchByLayout(layout, internal_shape::DimensionSetterFixedAxisFunc<D>{this, t});
-}
-
-inline bool Shape::set(Axis axis, int32_t t)
-{
- return DispatchByLayout(layout, internal_shape::DimensionSetterFunc{axis, this, t});
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Spi.h b/runtime/onert/backend/gpu_cl/open_cl/Spi.h
deleted file mode 100644
index c1d65b67e..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Spi.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-
-#include <cstdint>
-
-#include "Api.h"
-#include "AccessType.h"
-#include "Status.h"
-
-// Contains only service provider-related interfaces. Users should not use them
-// directly.
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Converts a tensor object into another one.
-class TensorObjectConverter
-{
-public:
- virtual ~TensorObjectConverter() = default;
-
- virtual absl::Status Convert(const TensorObject &input, const TensorObject &output) = 0;
-};
-
-class TensorObjectConverterBuilder
-{
-public:
- virtual ~TensorObjectConverterBuilder() = default;
-
- virtual bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const = 0;
-
- virtual absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
- std::unique_ptr<TensorObjectConverter> *converter) = 0;
-};
-
-// Connects tensor definition provided by a user (external) with tensor
-// definition used by the inference engine (internal).
-struct TensorTieDef
-{
- uint32_t id;
- AccessType access_type;
- TensorObjectDef internal_def;
- TensorObjectDef external_def;
-};
-
-// Connects external tensor object to internal tensor object and provides
-// functionality to copy data to/from external object to internal.
-class TensorTie
-{
-public:
- explicit TensorTie(const TensorTieDef &def) : def_(def) {}
-
- virtual ~TensorTie() = default;
-
- virtual absl::Status SetExternalObject(TensorObject obj) = 0;
-
- virtual TensorObject GetExternalObject() = 0;
-
- virtual absl::Status CopyToExternalObject() = 0;
-
- virtual absl::Status CopyFromExternalObject() = 0;
-
- const TensorTieDef &def() const { return def_; }
-
-private:
- const TensorTieDef def_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc
deleted file mode 100644
index eada697ac..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StorageTypeUtil.h"
-
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
- const TensorDescriptor &descriptor)
-{
- const int slices = DivideRoundUp(shape.c, 4);
- switch (descriptor.storage_type)
- {
- case TensorStorageType::BUFFER:
- {
- const uint64_t flt4_size = 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
- const uint64_t buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
- return buffer_size <= device_info.buffer_max_size;
- }
- case TensorStorageType::IMAGE_BUFFER:
- return (uint64_t)shape.b * shape.w * shape.h * shape.d * slices <=
- device_info.image_buffer_max_size;
- case TensorStorageType::TEXTURE_3D:
- if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1)
- {
- // clCreateImage3D (that used in CL 1.0/1.1) can not create image with
- // depth = 1 by specification;
- return false;
- }
- return (uint64_t)shape.w * shape.b <= device_info.image3d_max_width &&
- (uint64_t)shape.h <= device_info.image3d_max_height &&
- (uint64_t)slices * shape.d <= device_info.image3d_max_depth;
- case TensorStorageType::TEXTURE_ARRAY:
- // Bug on some Adreno. b/131099086
- if (slices == 1 && !device_info.SupportsOneLayerTextureArray())
- {
- return false;
- }
- return (uint64_t)shape.w * shape.b <= device_info.image2d_max_width &&
- (uint64_t)shape.h <= device_info.image2d_max_height &&
- (uint64_t)slices * shape.d <= device_info.image_array_max_layers;
- case TensorStorageType::TEXTURE_2D:
- return (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
- (uint64_t)shape.h * slices <= device_info.image2d_max_height;
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return (uint64_t)shape.c <= 4 &&
- device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
- (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
- (uint64_t)shape.h <= device_info.image2d_max_height;
- default:
- return false;
- }
-}
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
- const TensorDescriptor &descriptor)
-{
- const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
- return CanCreateTensorWithShape(device_info, shape5D, descriptor);
-}
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
- const TensorStorageType &desired, const DataType &data_type,
- const Layout &layout)
-{
- if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout}))
- {
- return desired;
- }
- auto GetBestTypeAfterTextureArray = [&]() {
- if (device_info.SupportsImageBuffer() &&
- CanCreateTensorWithShape(
- device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout}))
- {
- return TensorStorageType::IMAGE_BUFFER;
- }
- else
- {
- return TensorStorageType::BUFFER;
- }
- };
- auto GetBestTypeAfterTexture2D = [&]() {
- if (device_info.SupportsTextureArray() &&
- CanCreateTensorWithShape(
- device_info, shape,
- TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout}))
- {
- return TensorStorageType::TEXTURE_ARRAY;
- }
- else
- {
- return GetBestTypeAfterTextureArray();
- }
- };
- auto GetBestTypeAfterTexture3D = [&]() {
- if (CanCreateTensorWithShape(
- device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout}))
- {
- return TensorStorageType::TEXTURE_2D;
- }
- else
- {
- return GetBestTypeAfterTexture2D();
- }
- };
- switch (desired)
- {
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return GetBestTypeAfterTexture2D();
- case TensorStorageType::TEXTURE_ARRAY:
- return GetBestTypeAfterTextureArray();
- case TensorStorageType::TEXTURE_3D:
- return GetBestTypeAfterTexture3D();
- case TensorStorageType::IMAGE_BUFFER:
- case TensorStorageType::BUFFER:
- return TensorStorageType::BUFFER;
- default:
- return TensorStorageType::BUFFER;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h
deleted file mode 100644
index a84c3865f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-
-#include "DeviceInfo.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
- const TensorDescriptor &descriptor);
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
- const TensorDescriptor &descriptor);
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
- const TensorStorageType &desired, const DataType &data_type,
- const Layout &layout);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc b/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc
deleted file mode 100644
index 983e0d29d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-
-#include "Buffer.h"
-#include "ClImageFormat.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "InternalTensor.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, const void *data_ptr,
- CLMemory *result)
-{
- const int slices = DivideRoundUp(shape.c, 4);
- cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
- if (data_ptr)
- {
- mem_flags |= CL_MEM_COPY_HOST_PTR;
- }
- switch (descriptor.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- {
- const size_t data_size =
- shape.b * shape.w * shape.h * shape.d * slices * 4 * SizeOf(descriptor.data_type);
- cl_int error_code;
- cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
- const_cast<void *>(data_ptr), &error_code);
- if (!memory)
- {
- return absl::UnknownError(absl::StrCat(
- "Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code)));
- }
- *result = CLMemory(memory, true);
- return absl::OkStatus();
- }
- case TensorStorageType::TEXTURE_2D:
- {
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = shape.w * shape.b * shape.d;
- desc.image_height = shape.h * slices;
- desc.image_depth = 0;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- desc.buffer = nullptr;
-
- cl_image_format format;
- format.image_channel_order = CL_RGBA;
- format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
- cl_int error_code;
- cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
- const_cast<void *>(data_ptr), &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
- CLErrorCodeToString(error_code)));
- }
-
- *result = CLMemory(memory, true);
- return absl::OkStatus();
- }
- case TensorStorageType::TEXTURE_3D:
- {
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE3D;
- desc.image_width = shape.w * shape.b;
- desc.image_height = shape.h;
- desc.image_depth = slices * shape.d;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- desc.buffer = nullptr;
-
- cl_image_format format;
- format.image_channel_order = CL_RGBA;
- format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
- cl_int error_code;
- cl_mem memory = CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
- const_cast<void *>(data_ptr), &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to create 3D texture (clCreateImage): ",
- CLErrorCodeToString(error_code)));
- }
-
- *result = CLMemory(memory, true);
- return absl::OkStatus();
- }
- case TensorStorageType::TEXTURE_ARRAY:
- {
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
- desc.image_width = shape.w * shape.b;
- desc.image_height = shape.h;
- desc.image_depth = 0;
- desc.image_array_size = slices * shape.d;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- desc.buffer = nullptr;
-
- cl_image_format format;
- format.image_channel_order = CL_RGBA;
- format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
- cl_int error_code;
- cl_mem memory = clCreateImage(context.context(), mem_flags, &format, &desc,
- const_cast<void *>(data_ptr), &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat(
- "Failed to create 2D texture array (clCreateImage): ", CLErrorCodeToString(error_code)));
- }
-
- *result = CLMemory(memory, true);
- return absl::OkStatus();
- }
-
- case TensorStorageType::SINGLE_TEXTURE_2D:
- {
- if (slices != 1)
- {
- return absl::InvalidArgumentError(absl::StrCat(
- "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", shape.c, "was provided"));
- }
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = shape.w * shape.b * shape.d;
- desc.image_height = shape.h;
- desc.image_depth = 0;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- desc.buffer = nullptr;
-
- cl_image_format format;
- if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type))
- {
- format.image_channel_order = ToChannelOrder(shape.c);
- format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
- }
- else
- {
- return absl::InvalidArgumentError(
- absl::StrCat("This device doesn't support ", shape.c, "-channel textures."));
- }
-
- cl_int error_code;
- cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
- const_cast<void *>(data_ptr), &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat(
- "Failed to create single 2D texture (clCreateImage): ", CLErrorCodeToString(error_code)));
- }
-
- *result = CLMemory(memory, true);
- return absl::OkStatus();
- }
-
- default:
- return absl::InternalError("Unsupported tensor storage type");
- }
-}
-
-absl::Status CreateImageBufferFromBuffer(const CLContext &context, cl_mem memory,
- DataType data_type, int width, cl_mem *result)
-{
- cl_image_format format;
- cl_image_desc desc;
- std::memset(&desc, 0, sizeof(desc));
- desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
- desc.image_width = width;
- desc.mem_object = memory;
-
- format.image_channel_data_type = ToImageChannelType(data_type);
- format.image_channel_order = CL_RGBA;
-
- cl_int error_code;
- *result =
- clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc, nullptr, &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
- CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
- const bool memory_owner = memory == nullptr;
- if (memory_owner)
- {
- CLMemory mem;
- RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
- memory = mem.Release();
- }
- if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- cl_mem image_memory;
- RETURN_IF_ERROR(CreateImageBufferFromBuffer(
- context, memory, descriptor.data_type,
- shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
- *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
- }
- else
- {
- *result = Tensor(memory, memory_owner, shape, descriptor);
- }
- return absl::OkStatus();
-}
-
-absl::Status CreateTensorShared(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
- const bool memory_owner = false;
- if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- cl_mem image_memory;
- RETURN_IF_ERROR(CreateImageBufferFromBuffer(
- context, memory, descriptor.data_type,
- shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
- *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
- }
- else
- {
- *result = Tensor(memory, memory_owner, shape, descriptor);
- }
- return absl::OkStatus();
-}
-
-} // namespace
-
-absl::Status TensorDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
- Tensor gpu_tensor;
- RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context));
- *result = absl::make_unique<Tensor>(std::move(gpu_tensor));
- return absl::OkStatus();
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC &shape,
- const TensorDescriptor &descriptor)
- : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner),
- shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape,
- const TensorDescriptor &descriptor)
- : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner), shape_(shape),
- descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
- const TensorDescriptor &descriptor)
- : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
- shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
- const TensorDescriptor &descriptor)
- : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
- shape_(shape), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(Tensor &&tensor)
- : memory_(tensor.memory_), image_buffer_memory_(tensor.image_buffer_memory_),
- memory_owner_(tensor.memory_owner_), shape_(tensor.shape_), descriptor_(tensor.descriptor_)
-{
- tensor.memory_ = nullptr;
- tensor.image_buffer_memory_ = nullptr;
-}
-
-Tensor &Tensor::operator=(Tensor &&tensor)
-{
- if (this != &tensor)
- {
- Release();
- std::swap(memory_, tensor.memory_);
- std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
- std::swap(memory_owner_, tensor.memory_owner_);
- std::swap(shape_, tensor.shape_);
- std::swap(descriptor_, tensor.descriptor_);
- }
- return *this;
-}
-
-void Tensor::Release()
-{
- // image_buffer_memory_ always owned by object
- if (image_buffer_memory_)
- {
- clReleaseMemObject(image_buffer_memory_);
- image_buffer_memory_ = nullptr;
- }
- if (memory_owner_ && memory_)
- {
- clReleaseMemObject(memory_);
- memory_ = nullptr;
- }
-}
-
-absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const
-{
- const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
- if (buffer_desc)
- {
- if (descriptor_.storage_type != TensorStorageType::BUFFER)
- {
- return absl::InvalidArgumentError("Tensor can be used with BufferDescriptor only wtih "
- "TensorStorageType::BUFFER.");
- }
- resources->buffers.push_back({"buffer", memory_});
- return absl::OkStatus();
- }
- const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(obj_ptr);
- if (!tensor_desc)
- {
- return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
- }
- if (descriptor_.HasAxis(Axis::WIDTH))
- {
- resources->ints.push_back({"width", Width()});
- resources->ints.push_back({"width_div2", Width() / 2});
- resources->ints.push_back({"width_div4", Width() / 4});
- resources->ints.push_back({"width_batched", Width() * Batch()});
- resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2});
- resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4});
- }
- if (descriptor_.HasAxis(Axis::HEIGHT))
- {
- resources->ints.push_back({"height", Height()});
- }
- if (descriptor_.HasAxis(Axis::CHANNELS))
- {
- resources->ints.push_back({"slices", Slices()});
- resources->ints.push_back({"channels", Channels()});
- }
- if (descriptor_.HasAxis(Axis::BATCH))
- {
- resources->ints.push_back({"batch", Batch()});
- }
- if (descriptor_.HasAxis(Axis::DEPTH))
- {
- resources->ints.push_back({"depth", Depth()});
- }
-
- if (descriptor_.storage_type == TensorStorageType::BUFFER)
- {
- resources->buffers.push_back({"buffer", memory_});
- }
- else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
- descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
- {
- resources->images2d.push_back({"image2d", memory_});
- }
- else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY)
- {
- resources->image2d_arrays.push_back({"image2d_array", memory_});
- }
- else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D)
- {
- resources->images3d.push_back({"image3d", memory_});
- }
- else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- if (obj_ptr->GetAccess() == AccessType::READ)
- {
- resources->image_buffers.push_back({"image_buffer", image_buffer_memory_});
- }
- else
- {
- resources->buffers.push_back({"buffer", memory_});
- }
- }
-
- return absl::OkStatus();
-}
-
-int3 Tensor::GetFullTensorRegion() const
-{
- switch (descriptor_.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::IMAGE_BUFFER:
- return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
- case TensorStorageType::TEXTURE_2D:
- return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
- case TensorStorageType::UNKNOWN:
- return {-1, -1, -1};
- }
- return {-1, -1, -1};
-}
-
-absl::Status Tensor::IsValid(const BHWC &shape) const
-{
- if (shape.b != shape_.b)
- {
- return absl::InvalidArgumentError("Shape batch does not match tensor batch");
- }
- if (shape.w != shape_.w)
- {
- return absl::InvalidArgumentError("Shape width does not match tensor width");
- }
- if (shape.h != shape_.h)
- {
- return absl::InvalidArgumentError("Shape height does not match tensor height");
- }
- if (shape.c != shape_.c)
- {
- return absl::InvalidArgumentError("Shape channels does not match tensor channels");
- }
- return absl::OkStatus();
-}
-
-absl::Status Tensor::IsValid(const BHWDC &shape) const
-{
- if (shape.b != shape_.b)
- {
- return absl::InvalidArgumentError("Shape batch does not match tensor batch");
- }
- if (shape.w != shape_.w)
- {
- return absl::InvalidArgumentError("Shape width does not match tensor width");
- }
- if (shape.h != shape_.h)
- {
- return absl::InvalidArgumentError("Shape height does not match tensor height");
- }
- if (shape.d != shape_.d)
- {
- return absl::InvalidArgumentError("Shape depth does not match tensor depth");
- }
- if (shape.c != shape_.c)
- {
- return absl::InvalidArgumentError("Shape channels does not match tensor channels");
- }
- return absl::OkStatus();
-}
-
-int Tensor::GetAlignedChannels() const
-{
- return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape_.c
- : AlignByN(shape_.c, 4);
-}
-
-uint64_t Tensor::GetMemorySizeInBytes() const
-{
- const uint64_t flt_size = static_cast<uint64_t>(SizeOf(descriptor_.data_type));
- const uint64_t flt4_size = 4 * flt_size;
- switch (descriptor_.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
- default:
- return 0;
- }
-}
-
-cl_mem Tensor::GetMemoryPtr() const
-{
- return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER ? image_buffer_memory_
- : memory_;
-}
-
-cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; }
-
-absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue)
-{
- void *data_ptr = nullptr;
- const int aligned_channels = GetAlignedChannels();
- const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
-
- const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
- std::vector<float> data_f;
- data_f.resize(elements_count);
- data_ptr = data_f.data();
- DataFromBHWDC(in, shape_, descriptor_, absl::MakeSpan(data_f.data(), data_f.size()));
-
- switch (descriptor_.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr));
- break;
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- RETURN_IF_ERROR(queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
- break;
- default:
- return absl::InternalError("Unsupported tensor storage type");
- }
-
- return absl::OkStatus();
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const TensorFloat32 &src)
-{
- RETURN_IF_ERROR(IsValid(src.shape));
- return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
- const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
- return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
- const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
- return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src)
-{
- RETURN_IF_ERROR(IsValid(src.shape));
- return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const
-{
- void *data_ptr = nullptr;
- const int aligned_channels = GetAlignedChannels();
- const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
- const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-
- std::vector<float> data_f;
- data_f.resize(elements_count);
- data_ptr = data_f.data();
- switch (descriptor_.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr));
- break;
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- RETURN_IF_ERROR(queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
- break;
- default:
- return absl::InternalError("Unsupported tensor storage type");
- }
-
- if (descriptor_.data_type == DataType::FLOAT32)
- {
- DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_, descriptor_, out);
- }
-
- return absl::OkStatus();
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const
-{
- RETURN_IF_ERROR(IsValid(dst->shape));
- return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const
-{
- RETURN_IF_ERROR(IsValid(dst->shape));
- return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context)
-{
- shape_ = desc.shape;
- descriptor_.data_type = desc.data_type;
- descriptor_.storage_type = desc.storage_type;
- descriptor_.layout = desc.layout;
- memory_owner_ = true;
- CLMemory memory;
- uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
- RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
- memory_ = memory.Release();
- if (desc.storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- RETURN_IF_ERROR(CreateImageBufferFromBuffer(*context, memory_, desc.data_type,
- shape_.b * shape_.w * shape_.h * shape_.d *
- DivideRoundUp(shape_.c, 4),
- &image_buffer_memory_));
- }
- return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
- const TensorDescriptor &descriptor, Tensor *result)
-{
- const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
- return CreateTensor(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, Tensor *result)
-{
- return CreateTensor(context, shape, descriptor, nullptr, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
- const TensorDescriptor &descriptor, Tensor *result)
-{
- const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
- return CreateTensorShared(context, shape5D, descriptor, memory, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
- const TensorDescriptor &descriptor, Tensor *result)
-{
- return CreateTensorShared(context, shape, descriptor, memory, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
- const TensorDescriptor &descriptor, CLMemory *result)
-{
- const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
- return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, CLMemory *result)
-{
- return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h b/runtime/onert/backend/gpu_cl/open_cl/Tensor.h
deleted file mode 100644
index b1930a423..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-
-#include <cstdint>
-#include <memory>
-
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "OpenclWrapper.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Tensor : public GPUObject
-{
-public:
- Tensor() : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
- Tensor(cl_mem memory, bool memory_owner, const BHWC &shape, const TensorDescriptor &descriptor);
- Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape, const TensorDescriptor &descriptor);
- Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
- const TensorDescriptor &descriptor);
- Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
- const TensorDescriptor &descriptor);
-
- // Move only
- Tensor(Tensor &&tensor);
- Tensor &operator=(Tensor &&tensor);
- Tensor(const Tensor &) = delete;
- Tensor &operator=(const Tensor &) = delete;
-
- virtual ~Tensor() { Release(); }
-
- absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const override;
-
- int Width() const { return shape_.w; }
- int Height() const { return shape_.h; }
- int Depth() const { return shape_.d; }
- int Channels() const { return shape_.c; }
- int Slices() const { return DivideRoundUp(shape_.c, 4); }
- int Batch() const { return shape_.b; }
- TensorDescriptor GetDescriptor() const { return descriptor_; }
- DataType GetDataType() const { return descriptor_.data_type; }
- TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
-
- // for profiling and memory statistics
- uint64_t GetMemorySizeInBytes() const;
-
- cl_mem GetMemoryPtr() const;
-
- // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
- // memory ptr.
- cl_mem GetMemoryPtrForWriting() const;
-
- absl::Status WriteData(CLCommandQueue *queue, const TensorFloat32 &src);
- absl::Status WriteData(CLCommandQueue *queue,
- const InternalTensor<Linear, DataType::FLOAT32> &src);
- absl::Status WriteData(CLCommandQueue *queue, const InternalTensor<HWC, DataType::FLOAT32> &src);
-
- absl::Status WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src);
- absl::Status ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const;
- absl::Status ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const;
-
- absl::Status CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context);
-
-private:
- absl::Status IsValid(const BHWC &shape) const;
- absl::Status IsValid(const BHWDC &shape) const;
-
- int GetChannelsAlignment() const;
- int GetAlignedChannels() const;
-
- absl::Status WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue);
- absl::Status ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const;
-
- int3 GetFullTensorRegion() const;
- void Release();
-
- cl_mem memory_;
- cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only
- bool memory_owner_;
- BHWDC shape_;
- TensorDescriptor descriptor_;
-};
-
-using TensorPtr = std::shared_ptr<Tensor>;
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
- const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
- const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
- const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
- const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
- const TensorDescriptor &descriptor, Tensor *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc
deleted file mode 100644
index 7ede38795..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc
+++ /dev/null
@@ -1,1116 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorType.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Shape.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetWriteImageFromDataType(DataType data_type)
-{
- if (data_type == DataType::FLOAT32)
- {
- return "write_imagef";
- }
- else if (data_type == DataType::FLOAT16)
- {
- return "write_imageh";
- }
- else
- {
- throw std::runtime_error("Not supported data type");
- }
-}
-
-} // namespace
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode)
-{
- switch (address_mode)
- {
- case TextureAddressMode::DONT_CARE:
- return "smp_none";
- case TextureAddressMode::ZERO:
- return "smp_zero";
- }
- return "";
-}
-
-std::string ToString(TensorStorageType type)
-{
- switch (type)
- {
- case TensorStorageType::UNKNOWN:
- return "TensorStorageType::UNKNOWN";
- case TensorStorageType::BUFFER:
- return "TensorStorageType::BUFFER";
- case TensorStorageType::TEXTURE_ARRAY:
- return "TensorStorageType::TEXTURE_ARRAY";
- case TensorStorageType::TEXTURE_2D:
- return "TensorStorageType::TEXTURE_2D";
- case TensorStorageType::TEXTURE_3D:
- return "TensorStorageType::TEXTURE_3D";
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return "TensorStorageType::SINGLE_TEXTURE_2D";
- case TensorStorageType::IMAGE_BUFFER:
- return "TensorStorageType::IMAGE_BUFFER";
- }
- return "";
-}
-
-TensorDescriptor::TensorDescriptor(TensorDescriptor &&desc)
- : GPUObjectDescriptor(std::move(desc)), data_type(desc.data_type),
- storage_type(desc.storage_type), layout(desc.layout), shape(desc.shape),
- data(std::move(desc.data))
-{
-}
-TensorDescriptor &TensorDescriptor::operator=(TensorDescriptor &&desc)
-{
- if (this != &desc)
- {
- std::swap(data_type, desc.data_type);
- std::swap(storage_type, desc.storage_type);
- std::swap(layout, desc.layout);
- std::swap(shape, desc.shape);
- data = std::move(desc.data);
- GPUObjectDescriptor::operator=(std::move(desc));
- }
- return *this;
-}
-
-GPUResources TensorDescriptor::GetGPUResources() const
-{
- GPUResources resources;
- if (HasAxis(Axis::WIDTH))
- {
- resources.ints.push_back("width");
- resources.ints.push_back("width_div2");
- resources.ints.push_back("width_div4");
- resources.ints.push_back("width_batched");
- resources.ints.push_back("width_batched_div2");
- resources.ints.push_back("width_batched_div4");
- }
- if (HasAxis(Axis::HEIGHT))
- {
- resources.ints.push_back("height");
- }
- if (HasAxis(Axis::CHANNELS))
- {
- resources.ints.push_back("slices");
- resources.ints.push_back("channels");
- }
- if (HasAxis(Axis::BATCH))
- {
- resources.ints.push_back("batch");
- }
- if (HasAxis(Axis::DEPTH))
- {
- resources.ints.push_back("depth");
- }
- if (storage_type == TensorStorageType::BUFFER)
- {
- GPUBufferDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- desc.element_size = 4;
- auto it1 = state_vars_.find("ElementsX2");
- if (it1 != state_vars_.end() && it1->second == "true")
- {
- desc.element_size = 8;
- }
- auto it2 = state_vars_.find("ElementsX4");
- if (it2 != state_vars_.end() && it2->second == "true")
- {
- desc.element_size = 16;
- }
- resources.buffers.push_back({"buffer", desc});
- }
- else if (storage_type == TensorStorageType::SINGLE_TEXTURE_2D ||
- storage_type == TensorStorageType::TEXTURE_2D)
- {
- GPUImage2DDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- resources.images2d.push_back({"image2d", desc});
- }
- else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
- {
- GPUImage2DArrayDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- resources.image2d_arrays.push_back({"image2d_array", desc});
- }
- else if (storage_type == TensorStorageType::TEXTURE_3D)
- {
- GPUImage3DDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- resources.images3d.push_back({"image3d", desc});
- }
- else if (storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- if (access_type_ == AccessType::READ)
- {
- GPUImageBufferDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- resources.image_buffers.push_back({"image_buffer", desc});
- }
- else
- {
- GPUBufferDescriptor desc;
- desc.data_type = data_type;
- desc.access_type = access_type_;
- desc.element_size = 4;
- resources.buffers.push_back({"buffer", desc});
- }
- }
- return resources;
-}
-
-absl::Status TensorDescriptor::PerformSelector(const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const
-{
- if (selector == "Width")
- {
- *result = GetWidth();
- return absl::OkStatus();
- }
- else if (selector == "Height")
- {
- *result = "height";
- return absl::OkStatus();
- }
- else if (selector == "Slices")
- {
- *result = "slices";
- return absl::OkStatus();
- }
- else if (selector == "SliceStride")
- {
- *result = GetSliceStride();
- return absl::OkStatus();
- }
- else if (selector == "Channels")
- {
- *result = "channels";
- return absl::OkStatus();
- }
- else if (selector == "Batch")
- {
- if (HasAxis(Axis::BATCH))
- {
- *result = "batch";
- }
- else
- {
- *result = "1";
- }
- return absl::OkStatus();
- }
- else if (selector == "Depth")
- {
- *result = "depth";
- return absl::OkStatus();
- }
- else if (selector == "SetBatchRef")
- {
- if (args.size() != 1)
- {
- return absl::InvalidArgumentError("Unsupported arguments in SetBatchRef selector");
- }
- state_vars_["batch_id"] = args[0];
- *result = "";
- return absl::OkStatus();
- }
- else if (selector == "Read")
- {
- return PerformReadSelector(args, template_args, result);
- }
- else if (selector == "Write")
- {
- return PerformWriteSelector(args, result);
- }
- else if (selector == "WriteLinear")
- {
- return PerformWriteLinearSelector(args, result);
- }
- else if (selector == "GetAddress")
- {
- return PerformGetAddressSelector(args, result);
- }
- else if (selector == "GetPtrWithSliceOffset")
- {
- return PerformGetPtrWithSliceOffsetSelector(args, result);
- }
- else if (selector == "GetWHOffset")
- {
- return PerformGetWHOffsetSelector(args, result);
- }
- else if (selector == "GetHandle")
- {
- return PerformGetHandleSelector(args, result);
- }
- else
- {
- return absl::NotFoundError(
- absl::StrCat("TensorDescriptor don't have selector with name - ", selector));
- }
-}
-
-absl::Status TensorDescriptor::PerformReadSelector(const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const
-{
- DataType read_as_type = data_type;
- if (!template_args.empty())
- {
- if (template_args.size() != 1)
- {
- return absl::NotFoundError("Unrecognized Read selector template arguments.");
- }
- else
- {
- RETURN_IF_ERROR(GetDataTypeFromTemplateArgs(template_args[0], &read_as_type));
- }
- }
- if (args.size() == 1)
- { // function overload for 1D linear types.
- if (storage_type == TensorStorageType::BUFFER ||
- storage_type == TensorStorageType::IMAGE_BUFFER)
- {
- *result = Read(read_as_type, args[0]);
- return absl::OkStatus();
- }
- else
- {
- return absl::InvalidArgumentError(
- "Read selector with single argument can be used only with linear "
- "storage types(BUFFER or IMAGE_BUFFER)");
- }
- }
- std::string xc;
- std::string yc;
- std::string zc;
- std::string sc;
- std::string bc;
- bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
- if (args.size() < 2 || !parsed)
- {
- return absl::NotFoundError("Unrecognized Read selector");
- }
-
- *result = Read(read_as_type, GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
- return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector(
- const std::vector<std::string> &args, std::string *value_name, std::string *x_coord,
- std::string *y_coord, std::string *s_coord) const
-{
- std::string xc;
- std::string yc;
- std::string zc;
- std::string sc;
- std::string bc;
- bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
- if (args.size() < 2 || !parsed)
- {
- return absl::NotFoundError("Unrecognized Write selector");
- }
- *value_name = args[0];
- if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
- {
- *x_coord = absl::StrCat("((", xc, ") * batch + (", bc, "))");
- }
- else
- {
- *x_coord = absl::StrCat("(", xc, ")");
- }
- *y_coord = absl::StrCat("(", yc, ")");
- *s_coord = absl::StrCat("(", sc, ")");
- return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- std::string xc;
- std::string yc;
- std::string zc;
- std::string sc;
- std::string bc;
- bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
- if (args.size() < 2 || !parsed)
- {
- return absl::NotFoundError("Unrecognized Write selector");
- }
- *result = Write(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
- return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteLinearSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
- {
- return absl::InvalidArgumentError("WriteLinear selector can be used only with linear "
- "storages(BUFFER/IMAGE_BUFFER)");
- }
- if (args.size() != 2)
- {
- return absl::NotFoundError("Unrecognized WriteLinear selector");
- }
- *result = Write(args[0], "(" + args[1] + ")");
- return absl::OkStatus();
-}
-
-std::string TensorDescriptor::Read(DataType read_as_type, const std::string &global_address) const
-{
- const std::string read_as = read_as_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
- std::string image_type;
- if (storage_type == TensorStorageType::TEXTURE_2D ||
- storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
- {
- image_type = "image2d";
- }
- else if (storage_type == TensorStorageType::TEXTURE_3D)
- {
- image_type = "image3d";
- }
- else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
- {
- image_type = "image2d_array";
- }
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- if (read_as_type == data_type)
- {
- return absl::StrCat("buffer[", global_address, "]");
- }
- else
- {
- const std::string conversion =
- read_as_type == DataType::FLOAT16 ? "convert_half4" : "convert_float4";
- return absl::StrCat(conversion, "(buffer[", global_address, "])");
- }
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- case TensorStorageType::TEXTURE_ARRAY:
- return absl::StrCat(read_as, "(", image_type,
- ", " + TextureAddressModeToString(ModeFromState()) + ", ", global_address,
- ")");
- case TensorStorageType::IMAGE_BUFFER:
- return absl::StrCat(read_as, "(image_buffer, ", global_address, ")");
- case TensorStorageType::UNKNOWN:
- return "";
- }
- return "";
-}
-
-std::string TensorDescriptor::Write(const std::string &var_name,
- const std::string &global_address) const
-{
- std::string image_type;
- if (storage_type == TensorStorageType::TEXTURE_2D ||
- storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
- {
- image_type = "image2d";
- }
- else if (storage_type == TensorStorageType::TEXTURE_3D)
- {
- image_type = "image3d";
- }
- else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
- {
- image_type = "image2d_array";
- }
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- return absl::StrCat("buffer[", global_address, "] = ", var_name, ";\n");
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- case TensorStorageType::TEXTURE_ARRAY:
- return absl::StrCat(GetWriteImageFromDataType(data_type), "(", image_type, ", ",
- global_address, ", ", var_name, ");\n");
- case TensorStorageType::UNKNOWN:
- return "";
- }
- return "";
-}
-
-absl::Status TensorDescriptor::PerformGetAddressSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- std::string xc;
- std::string yc;
- std::string zc;
- std::string sc;
- std::string bc;
- bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
- if (args.size() < 3 || !parsed)
- {
- return absl::NotFoundError("Unrecognized GetAddress selector");
- }
-
- *result = DeclareAddress(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
- return absl::OkStatus();
-}
-
-absl::Status
-TensorDescriptor::PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (storage_type != TensorStorageType::BUFFER)
- {
- return absl::InvalidArgumentError(
- "GetPtrWithSliceOffset selector can be used only with BUFFER");
- }
- if (args.size() != 1)
- {
- return absl::NotFoundError(
- absl::StrCat("GetPtrWithSliceOffset require one argument(slice coordinate), but ",
- args.size(), " was passed"));
- }
- *result = absl::StrCat("buffer + ", args[0], " * ", GetSliceStride());
- return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetWHOffsetSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
- {
- return absl::InvalidArgumentError(
- "GetWHOffset selector can be used only with BUFFER/IMAGE_BUFFER");
- }
- if (args.size() != 2)
- {
- return absl::NotFoundError(absl::StrCat(
- "GetWHOffset require two arguments(X and Y coordinates), but ", args.size(), " was passed"));
- }
- if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
- {
- auto it = state_vars_.find("batch_id");
- std::string batch_id;
- if (it == state_vars_.end())
- {
- return absl::NotFoundError(
- "Not found batch_id. Should be setted up by SetBatchRef(). method");
- }
- else
- {
- batch_id = it->second;
- }
- *result = absl::StrCat("((", args[1], ") * ", GetWidth(), " + (", args[0], ")) * batch + (",
- batch_id, ")");
- }
- else
- {
- *result = absl::StrCat("(", args[1], ") * ", GetWidth(), " + (", args[0], ")");
- }
- return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetHandleSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (!args.empty())
- {
- return absl::NotFoundError(
- absl::StrCat("GetHandle does not require arguments, but ", args.size(), " was passed"));
- }
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- *result = "buffer";
- return absl::OkStatus();
- case TensorStorageType::IMAGE_BUFFER:
- if (access_type_ == AccessType::READ)
- {
- *result = "image_buffer";
- }
- else
- {
- *result = "buffer";
- }
- return absl::OkStatus();
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- *result = "image2d";
- return absl::OkStatus();
- case TensorStorageType::TEXTURE_ARRAY:
- *result = "image2d_array";
- return absl::OkStatus();
- case TensorStorageType::TEXTURE_3D:
- *result = "image3d";
- return absl::OkStatus();
- case TensorStorageType::UNKNOWN:
- return absl::UnavailableError("Unknown type");
- }
- return absl::UnavailableError("Unknown type");
-}
-
-std::string TensorDescriptor::DeclareAddress(const std::string &var_name,
- const std::string &address) const
-{
- return absl::StrCat(StorageTypeToAddressType(), " ", var_name, " = ", address, ";");
-}
-
-std::string TensorDescriptor::StorageTypeToAddressType() const
-{
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- return "int";
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return "int2";
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return "int4";
- case TensorStorageType::UNKNOWN:
- return "";
- }
- return "";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHS(const std::string &x,
- const std::string &y,
- const std::string &s) const
-{
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- {
- return absl::Substitute("((($2) * height + ($1)) * $3 + ($0))", x, y, s, GetWidth());
- }
- case TensorStorageType::TEXTURE_2D:
- return absl::Substitute("(int2)(($0), ($1) * slices + ($2))", x, y, s);
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return absl::StrCat("(int2)(", x, ", ", y, ")");
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return absl::StrCat("(int4)(", x, ", ", y, ", ", s, ", 0)");
- case TensorStorageType::UNKNOWN:
- return "error";
- }
- return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHSB(const std::string &x,
- const std::string &y,
- const std::string &s,
- const std::string &b) const
-{
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- return absl::Substitute("(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y, s);
- case TensorStorageType::TEXTURE_2D:
- return absl::Substitute("(int2)(($0) * batch + ($1), ($2) * slices + ($3))", x, b, y, s);
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return absl::Substitute("(int2)(($0) * batch + ($1), ($2))", x, b, y);
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3), 0)", x, b, y, s);
- default:
- throw std::runtime_error("Unknown storage type");
- }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDS(const std::string &x,
- const std::string &y,
- const std::string &z,
- const std::string &s) const
-{
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- {
- return absl::Substitute("(((($3) * slices + ($2)) * height + ($1)) * $4 + ($0))", x, y, s, z,
- GetWidth());
- }
- case TensorStorageType::TEXTURE_2D:
- return absl::Substitute("(int2)(($0) * depth + ($1), ($2) * slices + ($3))", x, z, y, s);
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return absl::Substitute("(int2)(($0) * depth + ($1), ($2))", x, z, y);
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return absl::Substitute("(int4)(($0), ($1), ($2) * slices + ($3), 0)", x, y, z, s);
- case TensorStorageType::UNKNOWN:
- return "error";
- }
- return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDSB(const std::string &x,
- const std::string &y,
- const std::string &z,
- const std::string &s,
- const std::string &b) const
-{
- switch (storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- return absl::Substitute("((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + "
- "($0))",
- b, x, y, s, z);
- case TensorStorageType::TEXTURE_2D:
- return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3) * slices + ($4))",
- x, b, z, y, s);
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3))", x, b, z, y);
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3) * slices + ($4), 0)", x, b, y,
- z, s);
- default:
- throw std::runtime_error("Unknown storage type");
- }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclaration(const std::string &xc,
- const std::string &yc,
- const std::string &zc,
- const std::string &sc,
- const std::string &bc) const
-{
- if (layout == Layout::HWC || (IsBatchedWidth() && layout == Layout::BHWC))
- {
- return GetGlobalAddressNoDeclarationWHS(xc, yc, sc);
- }
- else if (layout == Layout::BHWC)
- {
- return GetGlobalAddressNoDeclarationWHSB(xc, yc, sc, bc);
- }
- else if (layout == Layout::HWDC || (IsBatchedWidth() && layout == Layout::BHWDC))
- {
- return GetGlobalAddressNoDeclarationWHDS(xc, yc, zc, sc);
- }
- else if (layout == Layout::BHWDC)
- {
- return GetGlobalAddressNoDeclarationWHDSB(xc, yc, zc, sc, bc);
- }
- else
- {
- throw std::runtime_error("Unsupported layout");
- }
-}
-
-absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs(const std::string &template_arg,
- DataType *result) const
-{
- std::string read_type = template_arg;
- if (read_type == "FLT" || read_type == "ACCUM_FLT")
- {
- auto it = state_vars_.find(read_type);
- if (it == state_vars_.end())
- {
- return absl::UnavailableError(
- absl::StrCat("Read selector template argument ", read_type, " uninitialized."));
- }
- else
- {
- read_type = it->second;
- }
- }
-
- if (read_type == "half")
- {
- *result = DataType::FLOAT16;
- }
- else if (read_type == "float")
- {
- *result = DataType::FLOAT32;
- }
- else
- {
- return absl::NotFoundError(
- absl::StrCat("Unrecognized Read selector template argument - ", read_type));
- }
- return absl::OkStatus();
-}
-
-bool TensorDescriptor::HasAxis(Axis axis) const
-{
- if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS)
- {
- return true;
- }
- if (axis == Axis::BATCH && (layout == Layout::BHWC || layout == Layout::BHWDC))
- {
- return true;
- }
- if (axis == Axis::DEPTH && (layout == Layout::HWDC || layout == Layout::BHWDC))
- {
- return true;
- }
- return false;
-}
-
-void TensorDescriptor::SetTextureAddressMode(TextureAddressMode mode)
-{
- if (mode == TextureAddressMode::ZERO)
- {
- state_vars_["TextureMode"] = "ZERO";
- }
- else
- {
- state_vars_["TextureMode"] = "DONT_CARE";
- }
-}
-
-bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string> &args, int offset,
- std::string *xc, std::string *yc, std::string *zc,
- std::string *sc, std::string *bc) const
-{
- if (HasAxis(Axis::WIDTH))
- {
- if ((size_t)offset >= args.size())
- return false;
- *xc = args[offset++];
- }
- if (HasAxis(Axis::HEIGHT))
- {
- if ((size_t)offset >= args.size())
- return false;
- *yc = args[offset++];
- }
- if (HasAxis(Axis::DEPTH))
- {
- if ((size_t)offset >= args.size())
- return false;
- *zc = args[offset++];
- }
- if (HasAxis(Axis::CHANNELS))
- {
- if ((size_t)offset >= args.size())
- {
- auto it = state_vars_.find("slice_id");
- if (it == state_vars_.end())
- {
- return false;
- }
- else
- {
- *sc = it->second;
- }
- }
- else
- {
- *sc = args[offset++];
- }
- }
- if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
- {
- if ((size_t)offset >= args.size())
- {
- auto it = state_vars_.find("batch_id");
- if (it == state_vars_.end())
- {
- return false;
- }
- else
- {
- *bc = it->second;
- }
- }
- else
- {
- *bc = args[offset++];
- }
- }
- return true;
-}
-
-bool TensorDescriptor::IsBatchedWidth() const
-{
- auto it = state_vars_.find("BatchedWidth");
- return it != state_vars_.end() && it->second == "true";
-}
-
-std::string TensorDescriptor::GetWidth() const
-{
- std::string div;
- auto it1 = state_vars_.find("ElementsX2");
- if (it1 != state_vars_.end() && it1->second == "true")
- {
- div = "_div2";
- }
- auto it2 = state_vars_.find("ElementsX4");
- if (it2 != state_vars_.end() && it2->second == "true")
- {
- div = "_div4";
- }
- auto it = state_vars_.find("BatchedWidth");
- if (it != state_vars_.end() && it->second == "true")
- {
- return "width_batched" + div;
- }
- else
- {
- return "width" + div;
- }
-}
-
-std::string TensorDescriptor::GetSliceStride() const
-{
- if (IsBatchedWidth())
- {
- return GetWidth() + " * height";
- }
- else
- {
- if (HasAxis(Axis::BATCH))
- {
- return GetWidth() + " * height * batch";
- }
- else
- {
- return GetWidth() + " * height";
- }
- }
-}
-
-TextureAddressMode TensorDescriptor::ModeFromState() const
-{
- auto it = state_vars_.find("TextureMode");
- if (it != state_vars_.end())
- {
- if (it->second == "ZERO")
- {
- return TextureAddressMode::ZERO;
- }
- else
- {
- return TextureAddressMode::DONT_CARE;
- }
- }
- else
- {
- return TextureAddressMode::DONT_CARE;
- }
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
- shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
- UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
- shape = BHWDC(1, 1, 1, 1, src.shape.v);
- UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(absl::Span<const float> src)
-{
- int aligned_channels =
- storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : AlignByN(shape.c, 4);
- int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels;
- data.resize(elements_count * SizeOf(data_type));
- if (data_type == DataType::FLOAT32)
- {
- float *gpu_data = reinterpret_cast<float *>(data.data());
- DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count));
- }
-}
-
-bool TensorDescriptor::SupportsZeroClamp(const Axis &axis) const
-{
- switch (storage_type)
- {
- case TensorStorageType::UNKNOWN:
- return false;
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- return false;
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return axis == Axis::WIDTH || axis == Axis::HEIGHT;
- case TensorStorageType::TEXTURE_3D:
- return axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::DEPTH;
- }
- return false;
-}
-
-bool TensorDescriptor::CanReadOutOfBorder(const Axis &) const
-{
- switch (storage_type)
- {
- case TensorStorageType::UNKNOWN:
- return false;
- case TensorStorageType::BUFFER:
- return false;
- case TensorStorageType::IMAGE_BUFFER:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_3D:
- case TensorStorageType::SINGLE_TEXTURE_2D:
- case TensorStorageType::TEXTURE_ARRAY:
- return true;
- }
- return false;
-}
-
-bool TensorDescriptor::IsLinear() const
-{
- return storage_type == TensorStorageType::BUFFER ||
- storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-bool TensorDescriptor::ReturnsZeroForNegOneRead() const
-{
- return storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-namespace
-{
-int GetLinearIndex(const TensorDescriptor &desc, const BHWDC &shape, int b, int x, int y, int d,
- int s, int sub_c)
-{
- const int slices = DivideRoundUp(shape.c, 4);
- switch (desc.storage_type)
- {
- case TensorStorageType::BUFFER:
- case TensorStorageType::IMAGE_BUFFER:
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) * 4 +
- sub_c; // DSHWBC4
- case TensorStorageType::TEXTURE_2D:
- return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) * 4 +
- sub_c; // HSWBDC4
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c + sub_c; // HWBDC
- default:
- return -1;
- }
- return -1;
-}
-
-int GetChannelsAlignment(const TensorDescriptor &desc, const BHWDC &shape)
-{
- return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : 4;
-}
-} // namespace
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
- absl::Span<T> dst)
-{
- const int channels_alignment = GetChannelsAlignment(desc, shape);
- const int slices = DivideRoundUp(shape.c, 4);
- for (int b = 0; b < shape.b; ++b)
- {
- for (int s = 0; s < slices; ++s)
- {
- for (int y = 0; y < shape.h; ++y)
- {
- for (int x = 0; x < shape.w; ++x)
- {
- for (int d = 0; d < shape.d; ++d)
- {
- for (int c = 0; c < channels_alignment; ++c)
- {
- float value;
- if (s * 4 + c < shape.c)
- {
- const int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
- value = src[cpu_index];
- }
- else
- {
- value = 0.0f;
- }
- int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
- dst[gpu_index] = value;
- }
- }
- }
- }
- }
- }
-}
-
-template void DataFromBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
- const TensorDescriptor &desc, absl::Span<float> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
- absl::Span<float> dst)
-{
- const int channels_alignment = GetChannelsAlignment(desc, shape);
- const int slices = DivideRoundUp(shape.c, 4);
- for (int b = 0; b < shape.b; ++b)
- {
- for (int s = 0; s < slices; ++s)
- {
- for (int y = 0; y < shape.h; ++y)
- {
- for (int x = 0; x < shape.w; ++x)
- {
- for (int d = 0; d < shape.d; ++d)
- {
- for (int c = 0; c < channels_alignment; ++c)
- {
- if (s * 4 + c >= shape.c)
- {
- continue;
- }
- int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
- int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
- dst[cpu_index] = src[gpu_index];
- }
- }
- }
- }
- }
- }
-}
-
-template void DataToBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
- const TensorDescriptor &desc, absl::Span<float> dst);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h b/runtime/onert/backend/gpu_cl/open_cl/TensorType.h
deleted file mode 100644
index 45523783f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-
-#include <cstddef>
-#include <string>
-
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TextureAddressMode
-{
- DONT_CARE, // translated to CLK_ADDRESS_NONE
- ZERO, // translated to CLK_ADDRESS_CLAMP
-};
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode);
-
-enum class TensorStorageType
-{
- UNKNOWN,
- BUFFER,
- IMAGE_BUFFER,
- TEXTURE_2D,
- TEXTURE_3D,
- TEXTURE_ARRAY,
- SINGLE_TEXTURE_2D
-};
-
-struct TensorDescriptor : public GPUObjectDescriptor
-{
- TensorDescriptor() = default;
- TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
- : data_type(dt), storage_type(st), layout(l)
- {
- }
-
- TensorDescriptor(const TensorDescriptor &) = default;
- TensorDescriptor &operator=(const TensorDescriptor &) = default;
- TensorDescriptor(TensorDescriptor &&desc);
- TensorDescriptor &operator=(TensorDescriptor &&desc);
-
- bool operator==(const TensorDescriptor &d) const
- {
- return data_type == d.data_type && storage_type == d.storage_type && layout == d.layout;
- }
-
- bool operator!=(const TensorDescriptor &d) const { return !(*this == d); }
-
- absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const override;
-
- GPUResources GetGPUResources() const override;
-
- absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
- void Release() override { data.clear(); }
-
- bool HasAxis(Axis axis) const;
- void SetTextureAddressMode(TextureAddressMode mode);
-
- absl::Status GetLinkingContextFromWriteSelector(const std::vector<std::string> &args,
- std::string *value_name, std::string *x_coord,
- std::string *y_coord, std::string *s_coord) const;
-
- void UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src);
- void UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src);
-
- bool SupportsZeroClamp(const Axis &axis) const;
- bool CanReadOutOfBorder(const Axis &axis) const;
- bool IsLinear() const;
-
- // applicable only for types that: IsLinear -> true.
- // In this case for address we have 1d component - addr (int)
- // If for addr == -1 this linear storage type returns FLT4(0.0), this function
- // returns true, otherwise false
- bool ReturnsZeroForNegOneRead() const;
-
- DataType data_type = DataType::UNKNOWN;
- TensorStorageType storage_type = TensorStorageType::UNKNOWN;
- // This field describes logical layout, actual(physical) GPU layout can be
- // totally different.
- Layout layout = Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
-
- // optional
- BHWDC shape;
- std::vector<uint8_t> data;
-
-private:
- absl::Status PerformReadSelector(const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const;
-
- absl::Status PerformGetAddressSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- absl::Status PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- absl::Status PerformGetWHOffsetSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- absl::Status PerformGetHandleSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- std::string DeclareAddress(const std::string &var_name, const std::string &address) const;
-
- std::string StorageTypeToAddressType() const;
-
- absl::Status PerformWriteSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- absl::Status PerformWriteLinearSelector(const std::vector<std::string> &args,
- std::string *result) const;
-
- std::string Read(DataType read_as_type, const std::string &global_address) const;
- std::string Write(const std::string &var_name, const std::string &global_address) const;
-
- bool IsBatchedWidth() const;
-
- std::string GetWidth() const;
- std::string GetSliceStride() const;
-
- TextureAddressMode ModeFromState() const;
-
- absl::Status GetDataTypeFromTemplateArgs(const std::string &template_arg, DataType *result) const;
-
- std::string GetGlobalAddressNoDeclarationWHS(const std::string &x, const std::string &y,
- const std::string &s) const;
- std::string GetGlobalAddressNoDeclarationWHSB(const std::string &x, const std::string &y,
- const std::string &s, const std::string &b) const;
- std::string GetGlobalAddressNoDeclarationWHDS(const std::string &x, const std::string &y,
- const std::string &z, const std::string &s) const;
- std::string GetGlobalAddressNoDeclarationWHDSB(const std::string &x, const std::string &y,
- const std::string &z, const std::string &s,
- const std::string &b) const;
- std::string GetGlobalAddressNoDeclaration(const std::string &xc, const std::string &yc,
- const std::string &zc, const std::string &sc,
- const std::string &bc) const;
-
- bool ParseCoordsFromArgs(const std::vector<std::string> &args, int offset, std::string *xc,
- std::string *yc, std::string *zc, std::string *sc,
- std::string *bc) const;
-
- void UploadData(absl::Span<const float> src);
-};
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
- absl::Span<T> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
- absl::Span<float> dst);
-
-std::string ToString(TensorStorageType type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc
deleted file mode 100644
index b1f8309e4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorTypeUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type)
-{
- switch (type)
- {
- case TensorStorageType::IMAGE_BUFFER:
- case TensorStorageType::BUFFER:
- return ObjectType::OPENCL_BUFFER;
- case TensorStorageType::SINGLE_TEXTURE_2D:
- case TensorStorageType::TEXTURE_2D:
- case TensorStorageType::TEXTURE_ARRAY:
- case TensorStorageType::TEXTURE_3D:
- return ObjectType::OPENCL_TEXTURE;
- default:
- return ObjectType::UNKNOWN;
- }
-}
-
-DataLayout ToDataLayout(TensorStorageType type)
-{
- switch (type)
- {
- case TensorStorageType::BUFFER:
- return DataLayout::DHWC4;
- case TensorStorageType::IMAGE_BUFFER:
- return DataLayout::DHWC4;
- case TensorStorageType::SINGLE_TEXTURE_2D:
- return DataLayout::BHWC;
- case TensorStorageType::TEXTURE_2D:
- return DataLayout::HDWC4;
- case TensorStorageType::TEXTURE_ARRAY:
- return DataLayout::DHWC4;
- case TensorStorageType::TEXTURE_3D:
- return DataLayout::DHWC4;
- default:
- return DataLayout::UNKNOWN;
- }
-}
-
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout)
-{
- switch (object_type)
- {
- case ObjectType::OPENCL_BUFFER:
- return TensorStorageType::BUFFER;
- case ObjectType::OPENCL_TEXTURE:
- switch (data_layout)
- {
- case DataLayout::BHWC:
- return TensorStorageType::SINGLE_TEXTURE_2D;
- case DataLayout::DHWC4:
- return TensorStorageType::TEXTURE_ARRAY;
- case DataLayout::HDWC4:
- return TensorStorageType::TEXTURE_2D;
- default:
- return TensorStorageType::UNKNOWN;
- }
- default:
- return TensorStorageType::UNKNOWN;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc
deleted file mode 100644
index ae25e85d0..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Texture2d.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// Creates new 4-channel 2D texture with cl_channel_type elements
-absl::Status CreateTexture2D(int width, int height, DataType type, void *data, CLContext *context,
- Texture2D *result)
-{
- cl_mem texture;
- cl_channel_type channel_type = DataTypeToChannelType(type);
- RETURN_IF_ERROR(
- CreateRGBAImage2D(context->context(), width, height, channel_type, data, &texture));
- *result = Texture2D(texture, width, height, channel_type);
-
- return absl::OkStatus();
-}
-} // namespace
-
-Texture2DDescriptor::Texture2DDescriptor(Texture2DDescriptor &&desc)
- : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
- normalized(desc.normalized), normalized_type(desc.normalized_type), size(desc.size),
- data(std::move(desc.data))
-{
-}
-
-Texture2DDescriptor &Texture2DDescriptor::operator=(Texture2DDescriptor &&desc)
-{
- if (this != &desc)
- {
- std::swap(element_type, desc.element_type);
- std::swap(normalized, desc.normalized);
- std::swap(normalized_type, desc.normalized_type);
- std::swap(size, desc.size);
- data = std::move(desc.data);
- GPUObjectDescriptor::operator=(std::move(desc));
- }
- return *this;
-}
-
-void Texture2DDescriptor::Release() { data.clear(); }
-
-GPUResources Texture2DDescriptor::GetGPUResources() const
-{
- GPUResources resources;
- GPUImage2DDescriptor desc;
- desc.data_type = element_type;
- desc.access_type = access_type_;
- resources.images2d.push_back({"tex2d", desc});
- return resources;
-}
-
-absl::Status Texture2DDescriptor::PerformSelector(const std::string &selector,
- const std::vector<std::string> &args,
- const std::vector<std::string> &,
- std::string *result) const
-{
- if (selector == "Read")
- {
- return PerformReadSelector(args, result);
- }
- else
- {
- return absl::NotFoundError(
- absl::StrCat("Texture2DDescriptor don't have selector with name - ", selector));
- }
-}
-
-absl::Status Texture2DDescriptor::PerformReadSelector(const std::vector<std::string> &args,
- std::string *result) const
-{
- if (args.size() != 2)
- {
- return absl::NotFoundError(absl::StrCat("Texture2DDescriptor Read require two arguments, but ",
- args.size(), " was passed"));
- }
- std::string read;
- switch (element_type)
- {
- case DataType::FLOAT32:
- read = "read_imagef";
- break;
- case DataType::FLOAT16:
- read = "read_imageh";
- break;
- case DataType::INT8:
- case DataType::INT16:
- case DataType::INT32:
- if (normalized)
- {
- read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
- }
- else
- {
- read = "read_imagei";
- }
- break;
- case DataType::UINT8:
- case DataType::UINT16:
- case DataType::UINT32:
- if (normalized)
- {
- read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
- }
- else
- {
- read = "read_imageui";
- }
- break;
- default:
- read = "unknown_type";
- break;
- }
- *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", " + args[1] + "))");
- return absl::OkStatus();
-}
-
-absl::Status Texture2DDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
- Texture2D gpu_texture;
- RETURN_IF_ERROR(gpu_texture.CreateFromTexture2DDescriptor(*this, context));
- *result = absl::make_unique<Texture2D>(std::move(gpu_texture));
- return absl::OkStatus();
-}
-
-Texture2D::Texture2D(cl_mem texture, int width, int height, cl_channel_type type)
- : texture_(texture), width_(width), height_(height), channel_type_(type)
-{
-}
-
-Texture2D::Texture2D(Texture2D &&texture)
- : texture_(texture.texture_), width_(texture.width_), height_(texture.height_),
- channel_type_(texture.channel_type_)
-{
- texture.texture_ = nullptr;
- texture.width_ = 0;
- texture.height_ = 0;
-}
-
-Texture2D &Texture2D::operator=(Texture2D &&texture)
-{
- if (this != &texture)
- {
- Release();
- std::swap(channel_type_, texture.channel_type_);
- std::swap(width_, texture.width_);
- std::swap(height_, texture.height_);
- std::swap(texture_, texture.texture_);
- }
- return *this;
-}
-
-void Texture2D::Release()
-{
- if (texture_)
- {
- clReleaseMemObject(texture_);
- texture_ = nullptr;
- width_ = 0;
- height_ = 0;
- }
-}
-
-absl::Status Texture2D::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const
-{
- const auto *texture_desc = dynamic_cast<const Texture2DDescriptor *>(obj_ptr);
- if (!texture_desc)
- {
- return absl::InvalidArgumentError("Expected Texture2DDescriptor on input.");
- }
-
- resources->images2d.push_back({"tex2d", texture_});
- return absl::OkStatus();
-}
-
-absl::Status Texture2D::CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc,
- CLContext *context)
-{
- width_ = desc.size.x;
- height_ = desc.size.y;
- channel_type_ = DataTypeToChannelType(desc.element_type, desc.normalized);
- uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
- return CreateRGBAImage2D(context->context(), desc.size.x, desc.size.y, channel_type_, data_ptr,
- &texture_);
-}
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result)
-{
- return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context, result);
-}
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result)
-{
- return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
- Texture2D *result)
-{
- return CreateTexture2D(width, height, type, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
- CLContext *context, Texture2D *result)
-{
- return CreateTexture2D(width, height, type, data, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h
deleted file mode 100644
index 264507079..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct Texture2DDescriptor : public GPUObjectDescriptor
-{
- DataType element_type;
- bool normalized = false; // used with INT data types, if normalized, we read
- // in kernel float data.
- DataType normalized_type; // can be FLOAT32 or FLOAT16, using with normalized
- // = true
-
- // optional
- int2 size = int2(0, 0);
- std::vector<uint8_t> data;
-
- Texture2DDescriptor() = default;
- Texture2DDescriptor(const Texture2DDescriptor &) = default;
- Texture2DDescriptor &operator=(const Texture2DDescriptor &) = default;
- Texture2DDescriptor(Texture2DDescriptor &&desc);
- Texture2DDescriptor &operator=(Texture2DDescriptor &&desc);
-
- absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
- const std::vector<std::string> &template_args,
- std::string *result) const override;
-
- GPUResources GetGPUResources() const override;
- absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
- absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
- void Release() override;
-};
-
-// Texture2D represent formatted GPU data storage.
-// Texture2D is moveable but not copyable.
-class Texture2D : public GPUObject
-{
-public:
- Texture2D() {} // just for using Texture2D as a class members
- Texture2D(cl_mem texture, int width, int height, cl_channel_type type);
-
- // Move only
- Texture2D(Texture2D &&texture);
- Texture2D &operator=(Texture2D &&texture);
- Texture2D(const Texture2D &) = delete;
- Texture2D &operator=(const Texture2D &) = delete;
-
- virtual ~Texture2D() { Release(); }
-
- cl_mem GetMemoryPtr() const { return texture_; }
-
- // Writes data to a texture. Data should point to a region that
- // has exact width * height * sizeof(pixel) bytes.
- template <typename T> absl::Status WriteData(CLCommandQueue *queue, const absl::Span<T> data);
-
- // Reads data from Texture2D into CPU memory.
- template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
- absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
- GPUResourcesWithValue *resources) const override;
-
- absl::Status CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc, CLContext *context);
-
-private:
- void Release();
-
- cl_mem texture_ = nullptr;
- int width_;
- int height_;
- cl_channel_type channel_type_;
-};
-
-using Texture2DPtr = std::shared_ptr<Texture2D>;
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result);
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
- Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
- CLContext *context, Texture2D *result);
-
-template <typename T>
-absl::Status Texture2D::WriteData(CLCommandQueue *queue, const absl::Span<T> data)
-{
- const int element_size = ChannelTypeToSizeInBytes(channel_type_);
- if (sizeof(T) % element_size != 0)
- {
- return absl::InvalidArgumentError(
- "Template type T has not suitable element type for created texture.");
- }
- if (4 * width_ * height_ * element_size != data.size() * sizeof(T))
- {
- return absl::InvalidArgumentError(
- "absl::Span<T> data size is different from texture allocated size.");
- }
-
- RETURN_IF_ERROR(queue->EnqueueWriteImage(texture_, int3(width_, height_, 1), data.data()));
-
- return absl::OkStatus();
-}
-
-template <typename T>
-absl::Status Texture2D::ReadData(CLCommandQueue *queue, std::vector<T> *result) const
-{
- const int element_size = ChannelTypeToSizeInBytes(channel_type_);
- if (sizeof(T) != element_size)
- {
- return absl::InvalidArgumentError("Pixel format is different.");
- }
-
- const int elements_count = width_ * height_ * 4;
- result->resize(elements_count);
-
- return queue->EnqueueReadImage(texture_, int3(width_, height_, 1), result->data());
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Types.h b/runtime/onert/backend/gpu_cl/open_cl/Types.h
deleted file mode 100644
index f3cf33450..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Types.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// TODO(akulik): make these types Google-style compliant.
-
-template <typename T> struct alignas(sizeof(T)) Vec4
-{
- union {
- struct
- {
- T x, y, z, w;
- };
- std::array<T, 4> data_;
- };
-
- Vec4() : Vec4(T(0.0f)) {}
-
- template <typename S> Vec4(S x_, S y_, S z_, S w_) : x(x_), y(y_), z(z_), w(w_) {}
- explicit Vec4(T v) : x(v), y(v), z(v), w(v) {}
-
- template <typename S> explicit Vec4(S v) : x(v), y(v), z(v), w(v) {}
-
- Vec4(const Vec4 &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
- template <typename S> Vec4(const Vec4<S> &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
- Vec4 &operator=(const Vec4 &other)
- {
- x = other.x;
- y = other.y;
- z = other.z;
- w = other.w;
- return *this;
- }
-
- static constexpr int size() { return 4; }
-
- T &operator[](size_t n) { return data_[n]; }
- T operator[](size_t n) const { return data_[n]; }
-
- bool operator==(const Vec4 &value) const
- {
- return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2] &&
- data_[3] == value[3];
- }
- bool operator!=(const Vec4 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec3
-{
- union {
- struct
- {
- T x, y, z;
- };
- std::array<T, 3> data_;
- };
-
- Vec3() : Vec3(T(0.0f)) {}
-
- template <typename S> constexpr Vec3(S x_, S y_, S z_) : x(x_), y(y_), z(z_) {}
- explicit Vec3(T v) : x(v), y(v), z(v) {}
-
- template <typename S> explicit Vec3(S v) : x(v), y(v), z(v) {}
-
- Vec3(const Vec3 &f) : x(f.x), y(f.y), z(f.z) {}
-
- template <typename S> Vec3(const Vec3<S> &f) : x(f.x), y(f.y), z(f.z) {}
-
- Vec3 &operator=(const Vec3 &other)
- {
- x = other.x;
- y = other.y;
- z = other.z;
- return *this;
- }
-
- static constexpr int size() { return 3; }
-
- T &operator[](size_t n) { return data_[n]; }
- T operator[](size_t n) const { return data_[n]; }
- bool operator==(const Vec3 &value) const
- {
- return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2];
- }
- bool operator!=(const Vec3 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec2
-{
- union {
- struct
- {
- T x, y;
- };
- std::array<T, 2> data_;
- };
-
- Vec2() : Vec2(T(0.0f)) {}
-
- template <typename S> Vec2(S x_, S y_) : x(x_), y(y_) {}
- explicit Vec2(T v) : x(v), y(v) {}
-
- template <typename S> explicit Vec2(S v) : x(v), y(v) {}
-
- Vec2(const Vec2 &f) : x(f.x), y(f.y) {}
-
- template <typename S> Vec2(const Vec2<S> &f) : x(f.x), y(f.y) {}
-
- Vec2 &operator=(const Vec2 &other)
- {
- x = other.x;
- y = other.y;
- return *this;
- }
-
- bool operator==(const Vec2 &value) const { return data_[0] == value[0] && data_[1] == value[1]; }
-
- bool operator!=(const Vec2 &value) const { return !(this->operator==(value)); }
-
- static constexpr int size() { return 2; }
-
- T &operator[](size_t n) { return data_[n]; }
- T operator[](size_t n) const { return data_[n]; }
-};
-
-using float2 = Vec2<float>;
-using byte2 = Vec2<int8_t>;
-using ubyte2 = Vec2<uint8_t>;
-using short2 = Vec2<int16_t>;
-using ushort2 = Vec2<uint16_t>;
-using int2 = Vec2<int32_t>;
-using uint2 = Vec2<uint32_t>;
-
-using float3 = Vec3<float>;
-using byte3 = Vec3<int8_t>;
-using ubyte3 = Vec3<uint8_t>;
-using short3 = Vec3<int16_t>;
-using ushort3 = Vec3<uint16_t>;
-using int3 = Vec3<int32_t>;
-using uint3 = Vec3<uint32_t>;
-
-using float4 = Vec4<float>;
-using byte4 = Vec4<int8_t>;
-using ubyte4 = Vec4<uint8_t>;
-using short4 = Vec4<int16_t>;
-using ushort4 = Vec4<uint16_t>;
-using int4 = Vec4<int32_t>;
-using uint4 = Vec4<uint32_t>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/Util.cc
deleted file mode 100644
index 9f5a8388b..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.cc
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Status.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string CLErrorCodeToString(cl_int error_code)
-{
- switch (error_code)
- {
- case CL_SUCCESS:
- return "Success";
- case CL_DEVICE_NOT_FOUND:
- return "Device not found";
- case CL_DEVICE_NOT_AVAILABLE:
- return "Device not available";
- case CL_COMPILER_NOT_AVAILABLE:
- return "Compiler not available";
- case CL_MEM_OBJECT_ALLOCATION_FAILURE:
- return "Memory object allocation failure";
- case CL_OUT_OF_RESOURCES:
- return "Out of resources";
- case CL_OUT_OF_HOST_MEMORY:
- return "Out of host memory";
- case CL_PROFILING_INFO_NOT_AVAILABLE:
- return "Profiling information not available";
- case CL_MEM_COPY_OVERLAP:
- return "Memory copy overlap";
- case CL_IMAGE_FORMAT_MISMATCH:
- return "Image format mismatch";
- case CL_IMAGE_FORMAT_NOT_SUPPORTED:
- return "Image format not supported";
- case CL_BUILD_PROGRAM_FAILURE:
- return "Build program failure";
- case CL_MAP_FAILURE:
- return "Mapping failure";
- case CL_MISALIGNED_SUB_BUFFER_OFFSET:
- return "Misaligned sub-buffer offset";
- case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
- return "Execution status error for events in wait list";
- case CL_COMPILE_PROGRAM_FAILURE:
- return "Compile program failure";
- case CL_LINKER_NOT_AVAILABLE:
- return "Linker not available";
- case CL_LINK_PROGRAM_FAILURE:
- return "Link program failure";
- case CL_DEVICE_PARTITION_FAILED:
- return "Device partition failed";
- case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
- return "Kernel argument information not available";
-
- case CL_INVALID_VALUE:
- return "Invalid value";
- case CL_INVALID_DEVICE_TYPE:
- return "Invalid device type";
- case CL_INVALID_PLATFORM:
- return "Invalid platform";
- case CL_INVALID_DEVICE:
- return "Invalid device";
- case CL_INVALID_CONTEXT:
- return "Invalid context";
- case CL_INVALID_QUEUE_PROPERTIES:
- return "Invalid queue properties";
- case CL_INVALID_COMMAND_QUEUE:
- return "Invalid command queue";
- case CL_INVALID_HOST_PTR:
- return "Invalid host pointer";
- case CL_INVALID_MEM_OBJECT:
- return "Invalid memory object";
- case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
- return "Invalid image format descriptor";
- case CL_INVALID_IMAGE_SIZE:
- return "Invalid image size";
- case CL_INVALID_SAMPLER:
- return "Invalid sampler";
- case CL_INVALID_BINARY:
- return "Invalid binary";
- case CL_INVALID_BUILD_OPTIONS:
- return "Invalid build options";
- case CL_INVALID_PROGRAM:
- return "Invalid program";
- case CL_INVALID_PROGRAM_EXECUTABLE:
- return "Invalid program executable";
- case CL_INVALID_KERNEL_NAME:
- return "Invalid kernel name";
- case CL_INVALID_KERNEL_DEFINITION:
- return "Invalid kernel definition";
- case CL_INVALID_KERNEL:
- return "Invalid kernel";
- case CL_INVALID_ARG_INDEX:
- return "Invalid argument index";
- case CL_INVALID_ARG_VALUE:
- return "Invalid argument value";
- case CL_INVALID_ARG_SIZE:
- return "Invalid argument size";
- case CL_INVALID_KERNEL_ARGS:
- return "Invalid kernel arguments";
- case CL_INVALID_WORK_DIMENSION:
- return "Invalid work dimension";
- case CL_INVALID_WORK_GROUP_SIZE:
- return "Invalid work group size";
- case CL_INVALID_WORK_ITEM_SIZE:
- return "Invalid work item size";
- case CL_INVALID_GLOBAL_OFFSET:
- return "Invalid global offset";
- case CL_INVALID_EVENT_WAIT_LIST:
- return "Invalid event wait list";
- case CL_INVALID_EVENT:
- return "Invalid event";
- case CL_INVALID_OPERATION:
- return "Invalid operation";
- case CL_INVALID_GL_OBJECT:
- return "Invalid GL object";
- case CL_INVALID_BUFFER_SIZE:
- return "Invalid buffer size";
- case CL_INVALID_MIP_LEVEL:
- return "Invalid mip-level";
- case CL_INVALID_GLOBAL_WORK_SIZE:
- return "Invalid global work size";
- case CL_INVALID_PROPERTY:
- return "Invalid property";
- case CL_INVALID_IMAGE_DESCRIPTOR:
- return "Invalid image descriptor";
- case CL_INVALID_COMPILER_OPTIONS:
- return "Invalid compiler options";
- case CL_INVALID_LINKER_OPTIONS:
- return "Invalid linker options";
- case CL_INVALID_DEVICE_PARTITION_COUNT:
- return "Invalid device partition count";
- case CL_INVALID_PIPE_SIZE:
- return "Invalid pipe size";
- case CL_INVALID_DEVICE_QUEUE:
- return "Invalid device queue";
- case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
- return "Invalid GL sharegroup reference KHR";
-
- default:
- return "Unknown OpenCL";
- }
-}
-
-int ChannelTypeToSizeInBytes(cl_channel_type type)
-{
- switch (type)
- {
- case CL_FLOAT:
- return 4;
- default:
- return 0;
- }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
- cl_mem *result)
-{
- cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
- if (data)
- {
- flags |= CL_MEM_COPY_HOST_PTR;
- }
- cl_int error_code;
- *result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code);
- if (!*result)
- {
- return absl::UnknownError(absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
- CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized)
-{
- switch (type)
- {
- case DataType::FLOAT32:
- return CL_FLOAT;
- case DataType::INT8:
- return normalized ? CL_SNORM_INT8 : CL_SIGNED_INT8;
- case DataType::UINT8:
- return normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8;
- case DataType::INT16:
- return normalized ? CL_SNORM_INT16 : CL_SIGNED_INT16;
- case DataType::UINT16:
- return normalized ? CL_UNORM_INT16 : CL_UNSIGNED_INT16;
- case DataType::INT32:
- return CL_SIGNED_INT32;
- case DataType::UINT32:
- return CL_UNSIGNED_INT32;
- default:
- return CL_FLOAT;
- }
-}
-
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
- cl_channel_type channel_type, void *data, cl_mem *result)
-{
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = width;
- desc.image_height = height;
- desc.image_depth = 0;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- desc.buffer = nullptr;
-
- cl_image_format format;
- format.image_channel_order = CL_RGBA;
- format.image_channel_data_type = channel_type;
-
- cl_mem_flags flags = CL_MEM_READ_WRITE;
- if (data)
- {
- flags |= CL_MEM_COPY_HOST_PTR;
- }
-
- cl_int error_code;
- *result = CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code);
- if (error_code != CL_SUCCESS)
- {
- return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
- CLErrorCodeToString(error_code)));
- }
- return absl::OkStatus();
-}
-
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
- const std::string &stride_x, const std::string &padding_x)
-{
- // TODO(sorokin) check perf and optimize with floor() if needed
- // int p0 = src_x / batch_size;\n";
- // int b0 = src_x % batch_size;\n";
- // return p0 * stride_x * batch_size + b0 + padding_x;\n";
- return absl::Substitute("((($0) / $1) * $2 * $1 + (($0) % $1) + $3)", src_x, batch_size, stride_x,
- padding_x);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.h b/runtime/onert/backend/gpu_cl/open_cl/Util.h
deleted file mode 100644
index 996c564f4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-
-#include <string>
-
-#include "absl/types/span.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
- const std::string &stride_x, const std::string &padding_x);
-
-// @param n must be non negative
-// @param divisor must be greater than zero
-template <typename T, typename N> T DivideRoundUp(T n, N divisor)
-{
- const T div = static_cast<T>(divisor);
- const T q = n / div;
- return n % div == 0 ? q : q + 1;
-}
-
-template <> inline uint3 DivideRoundUp(uint3 n, uint3 divisor)
-{
- return uint3(DivideRoundUp(n.x, divisor.x), DivideRoundUp(n.y, divisor.y),
- DivideRoundUp(n.z, divisor.z));
-}
-
-// @param number or its components must be greater than zero
-// @param n must be greater than zero
-template <typename T, typename N> T AlignByN(T number, N n) { return DivideRoundUp(number, n) * n; }
-
-std::string CLErrorCodeToString(cl_int error_code);
-
-int ChannelTypeToSizeInBytes(cl_channel_type type);
-
-template <DataType S, typename T>
-void CopyLinearFLT4(const InternalTensor<Linear, S> &src, absl::Span<T> dst)
-{
- const int dst_depth = dst.size();
- for (int d = 0; d < dst_depth; ++d)
- {
- T val;
- for (int i = 0; i < 4; ++i)
- {
- const int dst_ch = d * 4 + i;
- val[i] = dst_ch >= src.shape.v ? 0.0f : src.data[dst_ch];
- }
- dst[d] = val;
- }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
- cl_mem *result);
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized = false);
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
- cl_channel_type channel_type, void *data, cl_mem *result);
-
-template <DataType S, typename T>
-void RearrangeWeightsToOHWIOGroupI4O4(const InternalTensor<OHWI, S> &weights, int out_group_size,
- absl::Span<T> dst)
-{
- const int dst_slices = DivideRoundUp(weights.shape.o, 4);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
- const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
- int counter = 0;
- for (int d = 0; d < dst_groups; ++d)
- {
- for (int y = 0; y < weights.shape.h; ++y)
- {
- for (int x = 0; x < weights.shape.w; ++x)
- {
- for (int s = 0; s < src_slices; ++s)
- {
- for (int d_group = 0; d_group < out_group_size; ++d_group)
- {
- for (int j = 0; j < 4; ++j)
- {
- T filter;
- for (int i = 0; i < 4; ++i)
- {
- const int s_ch = s * 4 + j;
- const int d_ch = (d * out_group_size + d_group) * 4 + i;
- if (s_ch < weights.shape.i && d_ch < weights.shape.o)
- {
- const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
- filter[i] = weights.data[f_index];
- }
- else
- {
- filter[i] = 0.0f;
- }
- }
- dst[counter++] = filter;
- }
- }
- }
- }
- }
- }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToODHWIOGroupI4O4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
- absl::Span<T> dst)
-{
- const int dst_slices = DivideRoundUp(weights.shape.o, 4);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
- const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
- int counter = 0;
- for (int d = 0; d < dst_groups; ++d)
- {
- for (int z = 0; z < weights.shape.d; ++z)
- {
- for (int y = 0; y < weights.shape.h; ++y)
- {
- for (int x = 0; x < weights.shape.w; ++x)
- {
- for (int s = 0; s < src_slices; ++s)
- {
- for (int d_group = 0; d_group < out_group_size; ++d_group)
- {
- for (int j = 0; j < 4; ++j)
- {
- T filter;
- for (int i = 0; i < 4; ++i)
- {
- const int s_ch = s * 4 + j;
- const int d_ch = (d * out_group_size + d_group) * 4 + i;
- if (s_ch < weights.shape.i && d_ch < weights.shape.o)
- {
- const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
- filter[i] = weights.data[f_index];
- }
- else
- {
- filter[i] = 0.0f;
- }
- }
- dst[counter++] = filter;
- }
- }
- }
- }
- }
- }
- }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4HWIOOGroupO4(const InternalTensor<OHWI, S> &weights, int out_group_size,
- absl::Span<T> dst)
-{
- const int dst_slices = DivideRoundUp(weights.shape.o, 4);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
- const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
- int counter = 0;
- for (int j = 0; j < 4; ++j)
- {
- for (int y = 0; y < weights.shape.h; ++y)
- {
- for (int x = 0; x < weights.shape.w; ++x)
- {
- for (int s = 0; s < src_slices; ++s)
- {
- for (int d = 0; d < dst_groups; ++d)
- {
- for (int d_group = 0; d_group < out_group_size; ++d_group)
- {
- T filter;
- for (int i = 0; i < 4; ++i)
- {
- const int s_ch = s * 4 + j;
- const int d_ch = (d * out_group_size + d_group) * 4 + i;
- if (s_ch < weights.shape.i && d_ch < weights.shape.o)
- {
- const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
- filter[i] = weights.data[f_index];
- }
- else
- {
- filter[i] = 0.0f;
- }
- }
- dst[counter++] = filter;
- }
- }
- }
- }
- }
- }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4DHWIOOGroupO4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
- absl::Span<T> dst)
-{
- const int dst_slices = DivideRoundUp(weights.shape.o, 4);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
- const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
- int counter = 0;
- for (int j = 0; j < 4; ++j)
- {
- for (int z = 0; z < weights.shape.d; ++z)
- {
- for (int y = 0; y < weights.shape.h; ++y)
- {
- for (int x = 0; x < weights.shape.w; ++x)
- {
- for (int s = 0; s < src_slices; ++s)
- {
- for (int d = 0; d < dst_groups; ++d)
- {
- for (int d_group = 0; d_group < out_group_size; ++d_group)
- {
- T filter;
- for (int i = 0; i < 4; ++i)
- {
- const int s_ch = s * 4 + j;
- const int d_ch = (d * out_group_size + d_group) * 4 + i;
- if (s_ch < weights.shape.i && d_ch < weights.shape.o)
- {
- const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
- filter[i] = weights.data[f_index];
- }
- else
- {
- filter[i] = 0.0f;
- }
- }
- dst[counter++] = filter;
- }
- }
- }
- }
- }
- }
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc
deleted file mode 100644
index 5f1103ad9..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/WinogradUtil.h"
-
-#include <cmath>
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace
-{
-// Matrices for Winograd trasformations were computed with the method described
-// here https://openreview.net/pdf?id=H1ZaRZVKg
-std::vector<float> GetTransposedMatrixForWinograd(int width, int height)
-{
- const float kDelta = std::sqrt(2.0f) / 2.0f;
- std::vector<float> px(width);
-
- px[0] = 0.0f;
- const int points_count = (width - 1) / 2;
- for (int i = 0; i < points_count; ++i)
- {
- px[i * 2 + 1] = kDelta * (i + 1.0f);
- px[i * 2 + 2] = -kDelta * (i + 1.0f);
- }
- px[width - 1] = 1.0f;
-
- std::vector<float> py(width, 1.0f);
- py[width - 1] = 0.0f;
-
- std::vector<float> result(height * width);
- for (int y = 0; y < width; ++y)
- {
- for (int x = 0; x < height; ++x)
- {
- result[x * width + y] = std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x);
- }
- }
- return result;
-}
-
-std::vector<float> GetInversedMatrixForWinograd(int rank)
-{
- auto matrix = GetTransposedMatrixForWinograd(rank, rank);
- std::vector<float> inverted(rank * rank, 0.0f);
- for (int i = 0; i < rank; ++i)
- {
- inverted[i * rank + i] = 1.0f;
- }
-
- for (int i = 1; i < rank - 1; ++i)
- {
- float inv_t = 1.0f / matrix[i * rank + i];
- for (int x = i; x < rank; ++x)
- {
- matrix[i * rank + x] *= inv_t;
- }
- for (int x = 0; x < rank; ++x)
- {
- inverted[i * rank + x] *= inv_t;
- }
-
- for (int y = 0; y < rank; ++y)
- {
- if (y == i)
- continue;
- float t = matrix[y * rank + i];
- for (int x = i; x < rank; ++x)
- {
- matrix[y * rank + x] -= t * matrix[i * rank + x];
- }
- for (int x = 0; x < rank; ++x)
- {
- inverted[y * rank + x] -= t * inverted[i * rank + x];
- }
- }
- }
-
- return inverted;
-}
-
-std::vector<float> Multiply(const std::vector<float> &a_mat, const std::vector<float> &b_mat, int m,
- int n, int k)
-{
- std::vector<float> result(m * k);
- for (int y = 0; y < m; ++y)
- {
- for (int x = 0; x < k; ++x)
- {
- float sum = 0.0f;
- for (int i = 0; i < n; ++i)
- {
- sum += a_mat[y * n + i] * b_mat[i * k + x];
- }
- result[y * k + x] = sum;
- }
- }
- return result;
-}
-} // namespace
-
-std::vector<float> AtMatrixForWinograd4x4To6x6() { return GetTransposedMatrixForWinograd(6, 4); }
-
-std::vector<float> BtMatrixForWinograd4x4To6x6() { return GetInversedMatrixForWinograd(6); }
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
- const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
- gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights)
-{
- gpu_cl::OHWI dst_shape;
- dst_shape.o = src_weights.shape.o;
- dst_shape.h = 6;
- dst_shape.w = 6;
- dst_shape.i = src_weights.shape.i;
- dst_weights->shape = dst_shape;
- dst_weights->data.resize(dst_shape.DimensionsProduct());
-
- auto gt_mat = GetTransposedMatrixForWinograd(6, 3);
- std::vector<float> g_mat(gt_mat.size());
- for (int y = 0; y < 3; ++y)
- {
- for (int x = 0; x < 6; ++x)
- {
- g_mat[x * 3 + y] = gt_mat[y * 6 + x];
- }
- }
-
- for (int d = 0; d < src_weights.shape.o; ++d)
- {
- for (int s = 0; s < src_weights.shape.i; ++s)
- {
- std::vector<float> in_vals(9);
- for (int y = 0; y < 3; ++y)
- {
- for (int x = 0; x < 3; ++x)
- {
- const int f_index = src_weights.shape.LinearIndex({d, y, x, s});
- in_vals[y * 3 + x] = src_weights.data[f_index];
- }
- }
-
- auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3);
- auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6);
- for (int y = 0; y < 6; ++y)
- {
- for (int x = 0; x < 6; ++x)
- {
- const int f_index = dst_shape.LinearIndex({d, y, x, s});
- dst_weights->data[f_index] = out_vals[y * 6 + x];
- }
- }
- }
- }
-}
-
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h
deleted file mode 100644
index 32e21760d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-// Matrices for Winograd trasformations received with method described here
-// https://openreview.net/pdf?id=H1ZaRZVKg
-
-// returns A transposed matrix(6 * 4) as array (24 values) for Winograd4x4To6x6
-std::vector<float> AtMatrixForWinograd4x4To6x6();
-
-// returns B transposed matrix(6 * 6) as array (36 values) for Winograd4x4To6x6
-std::vector<float> BtMatrixForWinograd4x4To6x6();
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
- const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
- gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights);
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc
deleted file mode 100644
index 847c2a2aa..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkgroupSelection.h"
-
-#include <math.h>
-
-#include <set>
-#include <vector>
-
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-template <typename T>
-void AddCornerCases(const T &grid, int max_work_group_total_size, const T &max_work_group_sizes,
- WorkGroupSizeAlignment x_alignment, WorkGroupSizeAlignment y_alignment,
- WorkGroupSizeAlignment z_alignment, std::vector<T> *work_groups)
-{
- for (int x = 1; x <= 4; ++x)
- {
- for (int y = 1; y <= 4; ++y)
- {
- for (int z = 1; z <= 4; ++z)
- {
- u_int32_t wg_x = DivideRoundUp(grid.x, x);
- u_int32_t wg_y = DivideRoundUp(grid.y, y);
- u_int32_t wg_z = DivideRoundUp(grid.z, z);
- if (wg_x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
- wg_y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
- wg_z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
- wg_x * wg_y * wg_z > static_cast<u_int32_t>(max_work_group_total_size))
- {
- continue;
- }
- if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % wg_x != 0)
- {
- continue;
- }
- if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % wg_y != 0)
- {
- continue;
- }
- if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % wg_z != 0)
- {
- continue;
- }
- work_groups->push_back({wg_x, wg_y, wg_z});
- }
- }
- }
-
- // this will add at least {1, 1, 1} always.
- for (u_int32_t x = 1; x <= 4; ++x)
- {
- for (u_int32_t y = 1; y <= 4; ++y)
- {
- for (u_int32_t z = 1; z <= 4; ++z)
- {
- if (x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
- y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
- z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
- x * y * z > static_cast<u_int32_t>(max_work_group_total_size))
- {
- continue;
- }
- if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % x != 0)
- {
- continue;
- }
- if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % y != 0)
- {
- continue;
- }
- if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % z != 0)
- {
- continue;
- }
- work_groups->push_back({x, y, z});
- }
- }
- }
-}
-
-std::vector<int> GetDivisors(int number)
-{
- const int max_divisor = static_cast<int>(sqrt(number));
- std::vector<int> divisors;
- // we don't know the number of dividers, so it is just heuristic.
- divisors.reserve(max_divisor / 3 + 1);
- for (int i = 1; i <= max_divisor; ++i)
- {
- const int d = number / i;
- if (i * d == number)
- {
- divisors.push_back(i);
- if (d != i)
- {
- divisors.push_back(d);
- }
- }
- }
- return divisors;
-}
-
-std::vector<int> GetDivisorsForRange(int number, int range)
-{
- const int last_number = number + range;
- const int max_divisor = static_cast<int>(sqrt(last_number));
- std::set<int> divisors;
- for (int i = 1; i <= max_divisor; ++i)
- {
- const int reminder = number % i;
- // iterate through numbers that divisible by i in our range;
- const int first_number = number + (i - reminder) % i;
- if (first_number <= last_number)
- {
- divisors.insert(i);
- }
- for (int j = first_number; j <= last_number; j += i)
- {
- const int d = j / i;
- if (d != i)
- {
- divisors.insert(d);
- }
- }
- }
- return std::vector<int>(divisors.begin(), divisors.end());
-}
-
-} // namespace
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment)
-{
- if (z_alignment == WorkGroupSizeAlignment::PRECISE)
- {
- // we will use for potential sizes, sizes that cover grid precisely
- // work group size * k (k is integer) == grid_size
- return GetDivisors(number);
- }
- else
- {
- // when we chose work group size we can use work group size that
- // work group size * k (k is integer) != grid_size (slightly bigger)
- // so in this heuristic we trying to find potential size, that satisfies
- // to this : work group size * k (k is integer) <= grid_size + 5
- // and this : work group size * k (k is integer) >= grid_size
- return GetDivisorsForRange(number, 5);
- }
-}
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
- const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
- WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment)
-{
- std::vector<T> work_groups;
- work_groups.reserve(64);
-
- std::vector<int> sizes_x = GetPossibleSizes(grid.x, x_alignment);
- std::vector<int> sizes_y = GetPossibleSizes(grid.y, y_alignment);
- std::vector<int> sizes_z = GetPossibleSizes(grid.z, z_alignment);
-
- for (auto x : sizes_x)
- {
- if (static_cast<int>(x) > static_cast<int>(max_work_group_sizes.x))
- continue;
- for (auto y : sizes_y)
- {
- if (static_cast<int>(y) > static_cast<int>(max_work_group_sizes.y))
- continue;
- for (auto z : sizes_z)
- {
- if (static_cast<int>(z) > static_cast<int>(max_work_group_sizes.z))
- continue;
- const int work_group_size = x * y * z;
- if (work_group_size < min_work_group_total_size ||
- work_group_size > max_work_group_total_size)
- continue;
- work_groups.push_back({x, y, z});
- }
- }
- }
-
- return work_groups;
-}
-
-// Specializations of GenerateWorkGroupSizes for int3 and uint3
-
-template std::vector<int3> GenerateWorkGroupSizes(const int3 &grid, int min_work_group_total_size,
- int max_work_group_total_size,
- const int3 &max_work_group_sizes,
- WorkGroupSizeAlignment x_alignment,
- WorkGroupSizeAlignment y_alignment,
- WorkGroupSizeAlignment z_alignment);
-
-template std::vector<uint3> GenerateWorkGroupSizes(const uint3 &grid, int min_work_group_total_size,
- int max_work_group_total_size,
- const uint3 &max_work_group_sizes,
- WorkGroupSizeAlignment x_alignment,
- WorkGroupSizeAlignment y_alignment,
- WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
- const int max_work_group_invocations,
- std::vector<T> *work_groups)
-{
- auto alignment = WorkGroupSizeAlignment::PRECISE;
- *work_groups =
- GenerateWorkGroupSizes<T>(grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
- max_work_group_size, alignment, alignment, alignment);
- // If the grid parameter too small, method below cannot generate workgroups.
- if (work_groups->empty())
- {
- AddCornerCases(grid, max_work_group_invocations, max_work_group_size, alignment, alignment,
- alignment, work_groups);
- }
-}
-
-// Specializations of GenerateWorkGroupSizesAlignedToGrid for int3 and uint3
-
-template void GenerateWorkGroupSizesAlignedToGrid(const int3 &grid, const int3 &max_work_group_size,
- const int max_work_group_invocations,
- std::vector<int3> *work_groups);
-
-template void GenerateWorkGroupSizesAlignedToGrid(const uint3 &grid,
- const uint3 &max_work_group_size,
- const int max_work_group_invocations,
- std::vector<uint3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h
deleted file mode 100644
index b0702ac7c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// PRECISE assume that WorkGroupSize * k = GridSize;
-// NO_ALIGNMENT no restrictions;
-// We need PRECISE when we don't have check in kernel for boundaries
-// If we have the check, we can use PRECISE or NO_ALIGNMENT as well.
-enum class WorkGroupSizeAlignment
-{
- PRECISE,
- NO_ALIGNMENT
-};
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment);
-
-// Specializations exist for int3 and uint3 in the .cc file
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
- const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
- WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
- const int max_work_group_invocations,
- std::vector<T> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc
deleted file mode 100644
index 09100fe1f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Add.h"
-
-#include <cstring>
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Util.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
- int dst_channels)
-{
- GPUOperation add(definition);
- int dst_depth = DivideRoundUp(dst_channels, 4);
- int src0_depth = DivideRoundUp(channels[0], 4);
- add.elementwise_ = true;
- add.linkable_ = dst_depth == src0_depth;
- if (src0_depth < dst_depth)
- {
- add.check_src_channels_size_ = true;
- }
- for (uint32_t i = 1; i < definition.src_tensors.size(); ++i)
- {
- const std::string tensor_name = absl::StrCat("src_data_", i);
- auto src_desc = definition.src_tensors[i];
- if (definition.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- add.AddSrcTensor(tensor_name, src_desc);
- add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n";
- add.code_ += " in_out_value += args." + tensor_name + ".Read(X_COORD, Y_COORD, S_COORD);\n";
- add.code_ += "}\n";
- }
- return add;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h
deleted file mode 100644
index 2335a901c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-
-#include <string>
-#include <vector>
-
-#include "GpuOperation.h"
-#include "open_cl/Operations.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Add operation supports not equal tensors on input (for possibility to
-// remove Padding operation with zeroes in channels dimension)
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
- int dst_channels);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc
deleted file mode 100644
index 1b9014fdf..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvBuffer1x1.h"
-
-#include <array>
-#include <string>
-#include <utility>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// element_size must be 1, 2 or 4
-// 1 - is FLT4
-// 2 - is FLT8
-// 4 - is FLT16
-// This function generates code for arithmetic part of convolution
-std::string GetComputationPart(const int3 &block_size, int element_size,
- CalculationsPrecision precision)
-{
- const std::string hexes[16] = {"0", "1", "2", "3", "4", "5", "6", "7",
- "8", "9", "a", "b", "c", "d", "e", "f"};
- std::string c;
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string z_s = std::to_string(z);
- c += " FLT16 W" + z_s + " = weights_cache[" + z_s + "];\n";
- for (int y = 0; y < block_size.y; ++y)
- {
- for (int x = 0; x < block_size.x; ++x)
- {
- std::string s_index = std::to_string(y * block_size.x + x);
- for (int e = 0; e < element_size; ++e)
- {
- std::string r_index = z_s + std::to_string(y) + std::to_string(x * element_size + e);
- const std::string f0 = "W" + z_s + ".s0123";
- const std::string f1 = "W" + z_s + ".s4567";
- const std::string f2 = "W" + z_s + ".s89ab";
- const std::string f3 = "W" + z_s + ".scdef";
- switch (precision)
- {
- case CalculationsPrecision::F32:
- case CalculationsPrecision::F16:
- c += " r" + r_index + " += " + f0 + " * s" + s_index + ".s" + hexes[e * 4 + 0] +
- ";\n";
- c += " r" + r_index + " += " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
- ";\n";
- c += " r" + r_index + " += " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] +
- ";\n";
- c += " r" + r_index + " += " + f3 + " * s" + s_index + ".s" + hexes[e * 4 + 3] +
- ";\n";
- break;
- case CalculationsPrecision::F32_F16:
- c += " r" + r_index + " += convert_float4(" + f0 + " * s" + s_index + ".s" +
- hexes[e * 4 + 0] + " + " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
- " + " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] + " + " + f3 + " * s" +
- s_index + ".s" + hexes[e * 4 + 3] + ");\n";
- break;
- }
- }
- }
- }
- }
- return c;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
- const OperationDef &definition, const BHWC &shape, int,
- int dst_depth)
-{
- ConvBuffer1x1::ConvParams conv_params;
- conv_params.element_size = 4;
- conv_params.block_size = int3(1, 1, 1);
- if (!device_info.IsMali())
- {
- return conv_params;
- }
- bool can_use_flt8 =
- (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32;
- bool is_midgard = device_info.IsMali() && device_info.mali_info.IsMidgard();
- if (is_midgard)
- {
- if (can_use_flt8)
- {
- conv_params.element_size = 8;
- }
- if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8)
- {
- conv_params.block_size.x = 2;
- }
- return conv_params;
- }
-
- int task_size = shape.w * shape.b * shape.h * dst_depth;
- int block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
-
- if (!can_use_flt8 && block_size > 4)
- {
- block_size = 4;
- }
-
- if (can_use_flt8 && block_size >= 2)
- {
- conv_params.element_size = 8;
- block_size /= 2;
- }
- if (block_size == 4)
- {
- conv_params.block_size.x = 2;
- if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32)
- {
- conv_params.block_size.y = 2;
- }
- else
- {
- conv_params.block_size.z = 2;
- }
- }
- else if (block_size == 2)
- {
- if (dst_depth >= 32)
- {
- conv_params.block_size.z = 2;
- }
- else
- {
- conv_params.block_size.x = 2;
- }
- }
-
- return conv_params;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
- const OperationDef &definition, int, int)
-{
- ConvBuffer1x1::ConvParams conv_params;
- conv_params.element_size = 4;
- conv_params.block_size = int3(1, 1, 1);
- if (device_info.IsMali() && definition.precision == CalculationsPrecision::F16 &&
- device_info.compute_units_count <= 4)
- {
- conv_params.block_size.x *= 2;
- }
- return conv_params;
-}
-
-} // namespace
-
-ConvBuffer1x1::ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params)
- : GPUOperation(definition), conv_params_(conv_params)
-{
- code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_);
- work_group_size_ = int3(2, 4, 1);
-}
-
-ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1 &&operation)
- : GPUOperation(std::move(operation)), conv_params_(std::move(operation.conv_params_))
-{
-}
-
-ConvBuffer1x1 &ConvBuffer1x1::operator=(ConvBuffer1x1 &&operation)
-{
- if (this != &operation)
- {
- std::swap(conv_params_, operation.conv_params_);
- GPUOperation::operator=(std::move(operation));
- }
- return *this;
-}
-
-std::string ConvBuffer1x1::GenerateConvBuffer1x1(const OperationDef &op_def,
- const ConvBuffer1x1::ConvParams &conv_params,
- Arguments *)
-{
- auto src_desc = op_def.src_tensors[0];
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- if (conv_params_.element_size == 8)
- {
- src_desc.SetStateVar("ElementsX2", "true");
- }
- else if (conv_params_.element_size == 16)
- {
- src_desc.SetStateVar("ElementsX4", "true");
- }
- AddSrcTensor("src_tensor", src_desc);
- if (op_def.src_tensors.size() == 2)
- {
- // dynamic weights
- BufferDescriptor desc;
- desc.element_type = op_def.src_tensors[1].data_type;
- desc.element_size = 16;
- desc.memory_type = MemoryType::GLOBAL;
- AddSrcBuffer("weights", desc);
- }
-
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- AddDstTensor("dst_tensor", dst_desc);
-
- std::string c = GetCommonDefines(op_def.precision);
- switch (op_def.precision)
- {
- case CalculationsPrecision::F32:
- c += "#define FLT8 float8\n";
- c += "#define FLT16 float16\n";
- break;
- case CalculationsPrecision::F32_F16:
- case CalculationsPrecision::F16:
- c += "#define FLT8 half8\n";
- c += "#define FLT16 half16\n";
- break;
- }
-
- const int3 block_size = conv_params.block_size;
- const int element_size = conv_params.element_size / 4;
-
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0) * " + std::to_string(block_size.x * element_size) + ";\n";
- c += " int X_SRC = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
- c += " int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n";
- c += " int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) return;\n";
- if (conv_params.different_weights_for_height)
- {
- c += " __global FLT16* weights_cache = args.weights.GetPtr() + (Z * "
- "args.src_tensor.Height() + "
- "Y * " +
- std::to_string(block_size.z) +
- ") * "
- "args.src_tensor.Slices();\n";
- }
- else
- {
- c += " __global FLT16* weights_cache = args.weights.GetPtr() + Z * "
- "args.src_tensor.Slices();\n";
- }
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string z_s = std::to_string(z);
- c += " ACCUM_FLT4 bias_val_" + z_s + " = TO_ACCUM_TYPE(args.biases.Read(Z + " + z_s + "));\n";
- for (int y = 0; y < block_size.y; ++y)
- {
- for (int x = 0; x < block_size.x * element_size; ++x)
- {
- c += " ACCUM_FLT4 r" + z_s + std::to_string(y) + std::to_string(x) + " = bias_val_" + z_s +
- ";\n";
- }
- }
- }
- for (int x = 0; x < block_size.x; ++x)
- {
- std::string x_s = std::to_string(x);
- c += " int xc" + x_s + " = min(X_SRC + " + std::to_string(x) +
- ", args.src_tensor.Width() - 1);\n";
- }
- for (int y = 0; y < block_size.y; ++y)
- {
- std::string y_s = std::to_string(y);
- c += " int yc" + y_s + " = min(Y + " + y_s + ", args.src_tensor.Height() - 1);\n";
- }
- for (int y = 0; y < block_size.y; ++y)
- {
- std::string y_s = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- std::string x_s = std::to_string(x);
- std::string i_s = std::to_string(y * block_size.x + x);
- c += " int src_addr_" + i_s + " = (yc" + y_s + ") * args.src_tensor.Width() + (xc" + x_s +
- ");\n";
- }
- }
- c += " for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n";
- for (int y = 0; y < block_size.y; ++y)
- {
- std::string y_s = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- std::string x_s = std::to_string(x);
- std::string i_s = std::to_string(y * block_size.x + x);
- c += " FLT" + std::to_string(element_size * 4) + " s" + i_s +
- " = args.src_tensor.Read(src_addr_" + i_s + ");\n";
- }
- }
- c += GetComputationPart(block_size, element_size, op_def.precision);
- for (int i = 0; i < block_size.x * block_size.y; ++i)
- {
- std::string i_s = std::to_string(i);
- c += " src_addr_" + i_s + " += args.src_tensor.SliceStride();\n";
- }
- c += " weights_cache += " + std::to_string(block_size.z) + ";\n";
- c += " }\n"; // SRC_SLICES
-
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string z_s = std::to_string(z);
- if (z != 0)
- {
- c += " if (Z + " + z_s + " >= args.dst_tensor.Slices()) return;\n";
- }
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string y_s = std::to_string(y);
- for (int x = 0; x < block_size.x * element_size; ++x)
- {
- const std::string x_s = std::to_string(x);
- c += " if (X + " + x_s + " < args.dst_tensor.Width() && Y + " + y_s +
- " < args.dst_tensor.Height()) {\n";
- c += " FLT4 res = TO_FLT4(r" + z_s + y_s + x_s + ");\n";
- c += " args.dst_tensor.Write(res, X + " + x_s + ", Y + " + y_s + ", Z + " + z_s + ");\n";
- c += " }\n";
- }
- }
- }
- c += "}\n";
- return c;
-}
-
-int3 ConvBuffer1x1::GetGridSize() const
-{
- const int dst_width_elements =
- DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4));
- const int grid_x = DivideRoundUp(dst_width_elements, conv_params_.block_size.x);
- const int grid_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
- const int grid_z = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.z);
- return int3(grid_x, grid_y, grid_z);
-}
-
-void ConvBuffer1x1::GetPossibleKernelWorkGroups(TuningType tuning_type,
- const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const
-{
- GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr)
-{
- auto src_storage_type = definition.src_tensors[0].storage_type;
- return src_storage_type == TensorStorageType::BUFFER && attr.weights.shape.w == 1 &&
- attr.weights.shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
- attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
- attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
- attr.padding.appended.h == 0;
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
- const Convolution2DAttributes &attr)
-{
- auto src_storage_type = definition.src_tensors[0].storage_type;
- return src_storage_type == TensorStorageType::BUFFER && weights_shape.w == 1 &&
- weights_shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
- attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
- attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
- attr.padding.appended.h == 0;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- ConvBuffer1x1::ConvParams conv_params;
- if (shape)
- {
- conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
- }
- else
- {
- conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
- }
- ConvBuffer1x1 result(definition, conv_params);
- result.UploadData(attr.weights, attr.bias);
- return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
- const FullyConnectedAttributes &attr, const BHWC *shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- ConvBuffer1x1::ConvParams conv_params;
- if (shape)
- {
- conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
- }
- else
- {
- conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
- }
- conv_params.block_size.x *= conv_params.block_size.y;
- conv_params.block_size.y = 1;
- ConvBuffer1x1 result(definition, conv_params);
- result.UploadData(attr.weights, attr.bias);
- return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- ConvBuffer1x1::ConvParams conv_params;
- if (shape)
- {
- conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
- }
- else
- {
- conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
- }
- conv_params.block_size.x *= conv_params.block_size.y;
- conv_params.block_size.y = 1;
- conv_params.different_weights_for_height = true;
- ConvBuffer1x1 result(definition, conv_params);
- result.UploadDataForWinograd4x4To6x6(attr.weights);
- return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape, const BHWC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(weights_shape.b, 4);
- const int src_depth = DivideRoundUp(weights_shape.c, 4);
- ConvBuffer1x1::ConvParams conv_params;
- if (dst_shape)
- {
- conv_params = GetBestParams(device_info, definition, *dst_shape, src_depth, dst_depth);
- }
- else
- {
- conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
- }
- ConvBuffer1x1 result(definition, conv_params);
- result.UploadBiases(attr.bias);
- return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h
deleted file mode 100644
index 0abd6051f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvBuffer1x1 : public GPUOperation
-{
-public:
- ConvBuffer1x1() = default;
-
- // Move only
- ConvBuffer1x1(ConvBuffer1x1 &&operation);
- ConvBuffer1x1 &operator=(ConvBuffer1x1 &&operation);
- ConvBuffer1x1(const ConvBuffer1x1 &) = delete;
- ConvBuffer1x1 &operator=(const ConvBuffer1x1 &) = delete;
-
- void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const override;
- int3 GetGridSize() const override;
-
- ConvWeightsDescription GetConvWeightsDescription() const
- {
- ConvWeightsDescription desc;
- desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
- desc.output_group_size = conv_params_.block_size.z;
- return desc;
- }
-
- struct ConvParams
- {
- int3 block_size = int3(1, 1, 1);
- int element_size = 4; // can be 4, 8 or 16
-
- // By default in 2d convolution we have the same weights for WH dims, but in
- // some cases we need separate weights for H dimension and convolution
- // kernel requires very small modifications to support it.
- bool different_weights_for_height = false;
- };
-
-private:
- ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params);
- friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *shape);
- friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
- const OperationDef &definition,
- const FullyConnectedAttributes &attr, const BHWC *shape);
- friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *shape);
- friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape,
- const BHWC *dst_shape);
-
- template <DataType T>
- void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
- template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
- template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
- template <DataType T> void UploadBiases(const InternalTensor<Linear, T> &biases);
-
- std::string GenerateConvBuffer1x1(const OperationDef &op_def,
- const ConvBuffer1x1::ConvParams &conv_params, Arguments *args);
-
- ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvBuffer1x1::UploadData(const InternalTensor<OHWI, T> &weights,
- const InternalTensor<Linear, T> &biases)
-{
- UploadWeights(weights);
- UploadBiases(biases);
-}
-
-template <DataType T>
-void ConvBuffer1x1::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
- InternalTensor<OHWI, T> wino_weights;
- RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
- UploadWeights(wino_weights);
- InternalTensor<Linear, DataType::FLOAT32> bias;
- bias.shape = Linear(weights.shape.o);
- bias.data.resize(weights.shape.o, 0.0f);
- UploadBiases(bias);
-}
-
-template <DataType T> void ConvBuffer1x1::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
- const int dst_depth = DivideRoundUp(weights.shape.o, 4);
- const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
- const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
- const int float4_size = sizeof(float4);
- // TODO
- // f32_weights ? sizeof(float4) : sizeof(half4);
-
- const int dst_depth_aligned = AlignByN(dst_depth, conv_params_.block_size.z);
- const int elements_count = weights.shape.h * weights.shape.w * src_depth * dst_depth_aligned * 4;
-
- BufferDescriptor desc;
- desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 16;
- desc.memory_type = MemoryType::GLOBAL;
- desc.size = float4_size * elements_count;
- desc.data.resize(desc.size);
-
- if (f32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
- RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
- absl::MakeSpan(ptr, elements_count));
- }
- // else
- // {
- // half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
- // RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
- // absl::MakeSpan(ptr, elements_count));
- // }
-
- args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvBuffer1x1::UploadBiases(const InternalTensor<Linear, T> &biases)
-{
- TensorLinearDescriptor desc;
- desc.storage_type = LinearStorageType::BUFFER;
- desc.element_type = definition_.GetDataType();
- int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
- desc.UploadLinearData(biases, depth);
- args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr);
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
- const Convolution2DAttributes &attr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
- const FullyConnectedAttributes &attr,
- const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape,
- const BHWC *dst_shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc
deleted file mode 100644
index 0a51bab5c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvConstants.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-// Adreno can provide up to ~3-4KB of constant memory, but in some cases even
-// 3KB can have very bad performance.
-int GetAdrenoOptimalMaxConstantSize(int gpu_version)
-{
- if (gpu_version < 600)
- {
- return 256 * 10; // 2.5KB
- }
- else
- {
- return 256 * 14; // 3.5KB
- }
-}
-
-int GetOptimalMaxConstantSize(const DeviceInfo &info)
-{
- if (!info.IsAdreno())
- {
- // In general we do not expect that this kernel will be used with non Adreno
- // so as it tuned for __constant memory that have big profit on Adreno
- return 1024; // 1KB
- }
- else
- {
- return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version);
- }
-}
-
-std::string GenerateConvolutionConstantCode(const OperationDef &op_def, const OHWI &weights_shape,
- bool stride_correction, GPUOperation *op)
-{
- auto src_desc = op_def.src_tensors[0];
- src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddSrcTensor("src_tensor", src_desc);
-
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddDstTensor("dst_tensor", dst_desc);
-
- std::string c = GetCommonDefines(op_def.precision);
-
- const int out_z = DivideRoundUp(weights_shape.o, 4);
- const std::string kOutZ = std::to_string(out_z);
- const int src_depth = DivideRoundUp(weights_shape.i, 4);
-
- const auto src_tensor_type = op_def.src_tensors[0].storage_type;
- const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
- src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
- switch (op_def.precision)
- {
- case CalculationsPrecision::F32:
- case CalculationsPrecision::F16:
- c += "#define CONV4(R, SRC, F, i) \\\n";
- c += " R += SRC.x * F[i + 0]; \\\n";
- c += " R += SRC.y * F[i + 1]; \\\n";
- c += " R += SRC.z * F[i + 2]; \\\n";
- c += " R += SRC.w * F[i + 3]; \n";
-
- c += "#define CONV3(R, SRC, F, i) \\\n";
- c += " R += SRC.x * F[i + 0]; \\\n";
- c += " R += SRC.y * F[i + 1]; \\\n";
- c += " R += SRC.z * F[i + 2]; \n";
-
- c += "#define CONV2(R, SRC, F, i) \\\n";
- c += " R += SRC.x * F[i + 0]; \\\n";
- c += " R += SRC.y * F[i + 1]; \n";
-
- c += "#define CONV1(R, SRC, F, i) \\\n";
- c += " R += SRC * F[i + 0]; \n";
- break;
- case CalculationsPrecision::F32_F16:
- c += "#define CONV4(R, SRC, F, i) \\\n";
- c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
- c += " + SRC.z * F[i + 2] + SRC.w * F[i + 3]);\n";
-
- c += "#define CONV3(R, SRC, F, i) \\\n";
- c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
- c += " + SRC.z * F[i + 2]);\n";
-
- c += "#define CONV2(R, SRC, F, i) \\\n";
- c += " R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]);\n";
-
- c += "#define CONV1(R, SRC, F, i) \\\n";
- c += " R += convert_float4(SRC * F[i + 0]);\n";
- break;
- }
-
- const std::string postfixes[] = {".x", ".xy", ".xyz", ""};
-
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- c += " int Y = get_global_id(1);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
- "return;\n";
- if (stride_correction)
- {
- c += " int start_x = " +
- GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
- ";\n";
- }
- else
- {
- if (op_def.IsBatchSupported())
- {
- c += " int start_x = X * args.stride_x + args.padding_x * "
- "args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int start_x = X * args.stride_x + args.padding_x;\n";
- }
- }
- c += " int start_y = Y * args.stride_y + args.padding_y;\n";
- c += " ACCUM_FLT4 r[" + kOutZ + "];\n";
- c += " for (int i = 0; i < " + kOutZ + "; ++i) {\n";
- c += " r[i] = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- c += " }\n";
- int filters_counter = 0;
- for (int s = 0; s < src_depth; ++s)
- {
- const int ch_count = std::min(4, weights_shape.i - s * 4);
- const std::string s_conv = "CONV" + std::to_string(ch_count);
- const std::string s_count = ch_count == 1 ? "" : std::to_string(ch_count);
- const std::string s_type = absl::StrCat("FLT", s_count);
- const std::string s_postfix = postfixes[ch_count - 1];
- const std::string dilation_x =
- op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
- for (int ky = 0; ky < weights_shape.h; ++ky)
- {
- std::string s_y = absl::StrCat("(start_y + ", ky, " * args.dilation_y)");
- if (manual_clamp)
- {
- c += " {\n";
- c += " bool y_out = " + s_y + " < 0 || " + s_y + " >= args.src_tensor.Height();\n";
- }
- for (int kx = 0; kx < weights_shape.w; ++kx)
- {
- c += " {\n";
- std::string s_x = absl::StrCat("(start_x + ", kx, " * " + dilation_x + ")");
- if (manual_clamp)
- {
- c += " bool x_out = " + s_x + "< 0 || " + s_x + ">= args.src_tensor.Width();\n";
- c += " " + s_type + " src = x_out || y_out ?";
- c += "(" + s_type + ")(0.0) : args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
- std::to_string(s) + ")" + s_postfix + ";\n";
- }
- else
- {
- c += " " + s_type + " src = args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
- std::to_string(s) + ")" + s_postfix + ";\n";
- }
- for (int d = 0; d < out_z; ++d)
- {
- c += " " + s_conv + "(r[" + std::to_string(d) + "], src, args.weigths.GetPtr(),";
- c += " " + std::to_string(filters_counter) + ");\n";
- filters_counter += ch_count;
- }
- c += " }\n";
- }
- if (manual_clamp)
- {
- c += " }\n";
- }
- }
- }
- for (int i = 0; i < out_z; ++i)
- {
- std::string s_i = std::to_string(i);
- c += " {\n";
- c += " FLT4 res = TO_FLT4(r[" + s_i + "]) + args.biases.Read(" + s_i + ");\n";
- c += " args.dst_tensor.Write(res, X, Y, " + s_i + ");\n";
- c += " }\n";
- }
- c += "}\n";
- return c;
-}
-
-} // namespace
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr)
-{
- if (device_info.IsAMD() && definition.precision != CalculationsPrecision::F32 &&
- definition.src_tensors[0].storage_type != TensorStorageType::BUFFER)
- {
- // BUG, some AMD gpus crashe without it
- return false;
- }
-
- const auto &w_shape = attr.weights.shape;
- const int dst_channels = AlignByN(w_shape.o, 4);
- const int filters_count = w_shape.i * dst_channels * w_shape.h * w_shape.w;
- const int float_size = sizeof(float);
- // TODO F32 and F16
- // definition.precision == CalculationsPrecision::F32 ? sizeof(float) : sizeof(half);
- const int filters_buffer_size = filters_count * float_size;
- const int kConstantMaxSize = GetOptimalMaxConstantSize(device_info);
- const int flt4_registers = DivideRoundUp(w_shape.o, 4);
- return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
-}
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr)
-{
- GPUOperation op(definition);
- UploadWeightsForConvConstants(attr.weights, definition.precision, &op);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("stride_y", attr.strides.h);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("dilation_x", attr.dilations.w);
- op.args_.AddInt("dilation_y", attr.dilations.h);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
-
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- op.code_ =
- GenerateConvolutionConstantCode(definition, attr.weights.shape, stride_correction, &op);
- if (definition.precision == CalculationsPrecision::F16 && device_info.IsAdreno3xx())
- {
- op.compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
- }
- if (definition.precision != CalculationsPrecision::F32 && device_info.IsPowerVR())
- {
- // BUG, some PowerVRs (GE8320) produce incorrect result without it
- op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE);
- }
-
- TensorLinearDescriptor desc;
- desc.storage_type = LinearStorageType::BUFFER;
- desc.element_type = definition.GetDataType();
- desc.memory_type = MemoryType::CONSTANT;
- desc.UploadLinearData(attr.bias);
- op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h
deleted file mode 100644
index be6670c53..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForConvConstants(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
- const int dst_depth = DivideRoundUp(weights.shape.o, 4);
- const int src_depth = DivideRoundUp(weights.shape.i, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
-
- int counter = 0;
- for (int s = 0; s < src_depth; ++s)
- {
- for (int y = 0; y < kernel_y; ++y)
- {
- for (int x = 0; x < kernel_x; ++x)
- {
- for (int d = 0; d < dst_depth; ++d)
- {
- const int channels_count = std::min(4, weights.shape.i - s * 4);
- T filters[4];
- for (int i = 0; i < 4; ++i)
- {
- for (int j = 0; j < channels_count; ++j)
- {
- const int s_ch = s * 4 + j;
- const int d_ch = d * 4 + i;
- if (s_ch < weights.shape.i && d_ch < weights.shape.o)
- {
- const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
- filters[i][j] = weights.data[f_index];
- }
- else
- {
- filters[i][j] = 0.0f;
- }
- }
- }
- T filters_new[4];
- for (int i = 0; i < 4; ++i)
- {
- for (int j = 0; j < 4; ++j)
- {
- filters_new[i][j] = filters[j][i];
- }
- }
- for (int i = 0; i < channels_count; ++i)
- {
- dst[counter++] = filters_new[i];
- }
- }
- }
- }
- }
-}
-
-template <DataType T>
-void UploadWeightsForConvConstants(const InternalTensor<OHWI, T> &weights,
- CalculationsPrecision precision, GPUOperation *op)
-{
- const int dst_depth = DivideRoundUp(weights.shape.o, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
-
- const bool f32_weights = precision == CalculationsPrecision::F32;
- const int float_size = f32_weights ? 4 : 2;
- const int float_count = weights.shape.i * dst_depth * 4 * kernel_x * kernel_y;
-
- BufferDescriptor desc;
- desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 4;
- desc.memory_type = MemoryType::CONSTANT;
- desc.size = float_size * float_count;
- desc.data.resize(desc.size);
-
- if (f32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
- RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
- }
- // else
- // {
- // half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
- // RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
- // }
-
- op->args_.AddObject("weigths", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr);
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc
deleted file mode 100644
index 5cb0c2719..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc
+++ /dev/null
@@ -1,1653 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvPowervr.h"
-
-#include <algorithm>
-#include <string>
-#include <utility>
-
-#include "absl/strings/substitute.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GenerateUploadByThreads(const std::string &local_ptr_name,
- const std::string &global_ptr_name,
- const std::string &global_offset_name,
- const std::string &lid_name, int total_work_items,
- int elements_to_upload)
-{
- std::string c;
- std::string offset = global_offset_name.empty() ? "" : global_offset_name + " + ";
- const int groups = elements_to_upload / total_work_items;
- const int reminder = elements_to_upload % total_work_items;
- for (int i = 0; i < groups; ++i)
- {
- c += " " + local_ptr_name + "[" + lid_name + " + " + std::to_string(total_work_items * i) +
- "] = " + global_ptr_name + "[" + offset + lid_name + " + " +
- std::to_string(total_work_items * i) + "];\n";
- }
- if (reminder != 0)
- {
- c += " if (" + lid_name + " < " + std::to_string(reminder) + ") {\n";
- c += " " + local_ptr_name + "[" + lid_name + " + " +
- std::to_string(total_work_items * groups) + "] = " + global_ptr_name + "[" + offset +
- lid_name + " + " + std::to_string(total_work_items * groups) + "];\n";
- c += " }\n";
- }
- return c;
-}
-
-std::string GenerateAsyncUpload(const std::string &local_ptr_name,
- const std::string &global_ptr_name,
- const std::string &global_offset_name, int elements_to_upload)
-{
- std::string c;
- std::string offset = global_offset_name.empty() ? "" : " + " + global_offset_name;
- c += " async_work_group_copy(" + local_ptr_name + ", " + global_ptr_name + offset + ", " +
- std::to_string(elements_to_upload) + ", 0);\n";
- return c;
-}
-
-std::string GenerateBlockCoords(const int4 &block_size, const int3 &work_group_launch_order,
- bool linear_spatial, bool need_depth)
-{
- std::string c;
- int3 launch_remap;
- launch_remap[work_group_launch_order.x] = 0;
- launch_remap[work_group_launch_order.y] = 1;
- launch_remap[work_group_launch_order.z] = 2;
- if (linear_spatial)
- {
- if (work_group_launch_order[0] == 0)
- {
- c += " int linear_spatial = get_global_id(0);\n";
- }
- else
- {
- c += " int linear_spatial = get_group_id(" + std::to_string(launch_remap[0]) +
- ") * get_local_size(0) + get_local_id(0);\n";
- }
- if (need_depth)
- {
- c += " int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
- ";\n";
- c += " linear_spatial = linear_spatial / args.task_size_x;\n";
- c += " int DST_Y = (linear_spatial % args.task_size_y) * " + std::to_string(block_size.y) +
- ";\n";
- c += " int DST_Z = (linear_spatial / args.task_size_y) * " + std::to_string(block_size.z) +
- ";\n";
- }
- else
- {
- c += " int DST_Y = (linear_spatial / args.task_size_x) * " + std::to_string(block_size.y) +
- ";\n";
- c += " int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
- ";\n";
- }
- if (work_group_launch_order[1] == 1)
- {
- c += " int DST_S = get_global_id(1) * " + std::to_string(block_size.w) + ";\n";
- }
- else
- {
- c += " int DST_S = (get_group_id(" + std::to_string(launch_remap[1]) +
- ") * get_local_size(1) + get_local_id(1)) * " + std::to_string(block_size.w) + ";\n";
- }
- }
- else
- {
- if (work_group_launch_order[0] == 0)
- {
- c += " int DST_X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
- }
- else
- {
- c += " int DST_X = (get_group_id(" + std::to_string(launch_remap[0]) +
- ") * get_local_size(0) + get_local_id(0)) * " + std::to_string(block_size.x) + ";\n";
- }
- std::string global_id_1;
- if (work_group_launch_order[1] == 1)
- {
- global_id_1 = "get_global_id(1)";
- }
- else
- {
- global_id_1 = "(get_group_id(" + std::to_string(launch_remap[1]) +
- ") * get_local_size(1) + get_local_id(1))";
- }
- if (need_depth)
- {
- c += " int linear_id_1 = " + global_id_1 + ";\n";
- c +=
- " int DST_Z = (linear_id_1 / args.task_size_y) * " + std::to_string(block_size.z) + ";\n";
- c +=
- " int DST_Y = (linear_id_1 % args.task_size_y) * " + std::to_string(block_size.y) + ";\n";
- }
- else
- {
- c += " int DST_Y = " + global_id_1 + " * " + std::to_string(block_size.y) + ";\n";
- }
- if (work_group_launch_order[2] == 2)
- {
- c += " int DST_S = get_global_id(2) * " + std::to_string(block_size.w) + ";\n";
- }
- else
- {
- c += " int DST_S = (get_group_id(" + std::to_string(launch_remap[2]) +
- ") * get_local_size(2) + get_local_id(2)) * " + std::to_string(block_size.w) + ";\n";
- }
- }
-
- return c;
-}
-} // namespace
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
- const DeviceInfo &device_info, const BHWC *dst_shape)
- : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
- padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
- kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 1, 1),
- dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
- conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
- const BHWC &weights_shape, const DeviceInfo &device_info,
- const BHWC *dst_shape)
- : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
- padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
- kernel_size_(weights_shape.w, weights_shape.h, 1, 1),
- dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
- conv_params_(GuessBestParams(device_info, definition, attr, weights_shape, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
- const DeviceInfo &device_info, const BHWC *dst_shape)
- : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
- dilation_(1, 1, 1, 1), conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition)
- : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
- dilation_(1, 1, 1, 1)
-{
-}
-
-ConvPowerVR::ConvPowerVR(ConvPowerVR &&operation)
- : GPUOperation(std::move(operation)), stride_(operation.stride_), padding_(operation.padding_),
- kernel_size_(operation.kernel_size_), dilation_(operation.dilation_),
- conv_params_(operation.conv_params_)
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
- const DeviceInfo &device_info, const BHWDC *dst_shape)
- : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 1),
- padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0),
- kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d, 1),
- dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 1),
- conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR &ConvPowerVR::operator=(ConvPowerVR &&operation)
-{
- if (this != &operation)
- {
- std::swap(stride_, operation.stride_);
- std::swap(padding_, operation.padding_);
- std::swap(kernel_size_, operation.kernel_size_);
- std::swap(dilation_, operation.dilation_);
- std::swap(conv_params_, operation.conv_params_);
- GPUOperation::operator=(std::move(operation));
- }
- return *this;
-}
-
-void ConvPowerVR::GenerateCode(const DeviceInfo &device_info)
-{
- if (conv_params_.linear_spatial)
- {
- grid_dimension_ = 2;
- }
- const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1;
- code_ = GenerateConv(device_info, definition_, stride_correction, conv_params_);
- if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
- {
- compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
- }
- if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher())
- {
- compiler_options_.push_back(CompilerOptions::CL_2_0);
- }
- bool kernel_is_trivial = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
- if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
- {
- kernel_is_trivial = kernel_is_trivial & conv_params_.z_kernel_is_1;
- }
- if (device_info.IsAdreno3xx() && definition_.precision == CalculationsPrecision::F16 &&
- kernel_is_trivial)
- {
- compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
- }
-}
-
-absl::Status ConvPowerVR::BindArguments(ArgumentsBinder *args)
-{
- if (!conv_params_.x_kernel_is_1)
- {
- RETURN_IF_ERROR(args->SetInt("stride_x", stride_.x));
- RETURN_IF_ERROR(args->SetInt("padding_x", padding_.x * src_[0]->Batch()));
- RETURN_IF_ERROR(args->SetInt("kernel_size_x", kernel_size_.x));
- RETURN_IF_ERROR(args->SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
- }
- if (!conv_params_.y_kernel_is_1)
- {
- RETURN_IF_ERROR(args->SetInt("stride_y", stride_.y));
- RETURN_IF_ERROR(args->SetInt("padding_y", padding_.y));
- RETURN_IF_ERROR(args->SetInt("kernel_size_y", kernel_size_.y));
- RETURN_IF_ERROR(args->SetInt("dilation_y", dilation_.y));
- }
- if (definition_.src_tensors[0].HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
- {
- RETURN_IF_ERROR(args->SetInt("stride_z", stride_.z));
- RETURN_IF_ERROR(args->SetInt("padding_z", padding_.z));
- RETURN_IF_ERROR(args->SetInt("kernel_size_z", kernel_size_.z));
- RETURN_IF_ERROR(args->SetInt("dilation_z", dilation_.z));
- }
- if (conv_params_.linear_spatial)
- {
- const int grid_x =
- DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
- RETURN_IF_ERROR(args->SetInt("task_size_x", grid_x));
- }
- if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
- {
- const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
- RETURN_IF_ERROR(args->SetInt("task_size_y", task_size_y));
- }
- return absl::OkStatus();
-}
-
-int3 ConvPowerVR::GetGridSize() const
-{
- const int task_size_x =
- DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
- const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
- const int task_size_z = DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z);
- const int task_size_s = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w);
- int3 wg;
-
- if (conv_params_.linear_spatial)
- {
- int grid_x = task_size_x * task_size_y;
- if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
- {
- grid_x *= task_size_z;
- }
- return int3(grid_x, task_size_s, 1);
- }
- else
- {
- int grid_y = task_size_y;
- if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
- {
- grid_y *= task_size_z;
- }
- return int3(task_size_x, grid_y, task_size_s);
- }
-}
-
-void ConvPowerVR::GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const
-{
- if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
- conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS ||
- conv_params_.fixed_work_group_size)
- {
- work_groups->push_back(work_group_size_);
- return;
- }
- GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-std::string ConvPowerVR::GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
- bool stride_correction, const ConvParams &conv_params)
-{
- auto src_desc = op_def.src_tensors[0];
- src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- AddSrcTensor("src_tensor", src_desc);
- if (op_def.src_tensors.size() == 2)
- {
- // dynamic weights
- BufferDescriptor desc;
- desc.element_type = op_def.src_tensors[1].data_type;
- desc.element_size = 4;
- desc.memory_type =
- conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
- ? MemoryType::CONSTANT
- : MemoryType::GLOBAL;
-
- AddSrcBuffer("weights", desc);
- }
-
- const auto &src_def = op_def.src_tensors[0];
-
- auto generate_id = [&](const std::string &x, const std::string &y, const std::string &z) {
- std::string id;
- if (src_def.HasAxis(Axis::WIDTH))
- {
- id += "_w" + x;
- }
- if (src_def.HasAxis(Axis::HEIGHT))
- {
- id += "_h" + y;
- }
- if (src_def.HasAxis(Axis::DEPTH))
- {
- id += "_d" + z;
- }
- return id;
- };
-
- auto generate_id_full = [&](const std::string &x, const std::string &y, const std::string &z,
- const std::string &s) { return generate_id(x, y, z) + "_s" + s; };
-
- auto generate_check = [&](const std::string &x, const std::string &y, const std::string &z) {
- std::string check;
- const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
- const std::vector<std::string> names{"in_x", "in_y", "in_z"};
- const std::vector<bool> is_1{conv_params_.x_kernel_is_1, conv_params_.y_kernel_is_1,
- conv_params_.z_kernel_is_1};
- const std::vector<std::string> coords{x, y, z};
- for (size_t i = 0; i < axes.size(); ++i)
- {
- const auto &axis = axes[i];
- if (src_def.HasAxis(axis) && !src_def.SupportsZeroClamp(axis) && !is_1[i])
- {
- if (!check.empty())
- {
- check += " && ";
- }
- check += names[i] + coords[i];
- }
- }
- return check;
- };
-
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- AddDstTensor("dst_tensor", dst_desc);
-
- if (!conv_params_.x_kernel_is_1)
- {
- args_.AddInt("stride_x");
- args_.AddInt("padding_x");
- args_.AddInt("kernel_size_x");
- args_.AddInt("dilation_x");
- }
- if (!conv_params_.y_kernel_is_1)
- {
- args_.AddInt("stride_y");
- args_.AddInt("padding_y");
- args_.AddInt("kernel_size_y");
- args_.AddInt("dilation_y");
- }
- if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
- {
- args_.AddInt("stride_z");
- args_.AddInt("padding_z");
- args_.AddInt("kernel_size_z");
- args_.AddInt("dilation_z");
- }
- if (conv_params_.linear_spatial)
- {
- args_.AddInt("task_size_x");
- }
- if (src_def.HasAxis(Axis::DEPTH))
- {
- args_.AddInt("task_size_y");
- }
-
- const bool need_local_mem =
- conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
- conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
-
- const int local_mem_size = conv_params.block_size.w * 4 * conv_params.src_depth_loop_size;
-
- const bool use_simd_broadcast = conv_params.IsPrivateMemBroadcast();
- const int simd_size = conv_params.simd_size;
-
- const bool late_oob_check = need_local_mem || use_simd_broadcast;
-
- const std::string weights_space =
- conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM ? "__constant"
- : "__global";
-
- const std::string weights_data_type =
- conv_params.weights_data_type == DataType::FLOAT32 ? "float4" : "half4";
-
- const std::string weights_global_ptr = weights_space + " " + weights_data_type + "*";
-
- std::string c = GetCommonDefines(op_def.precision);
- if (use_simd_broadcast)
- {
- if (device_info.cl_version == OpenCLVersion::CL_2_0)
- {
- c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
- }
- else if (device_info.SupportsExtension("cl_intel_subgroups"))
- {
- c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n";
- }
- }
- const int4 block_size = conv_params.block_size;
- if (conv_params.fixed_work_group_size)
- {
- c += "__attribute__((reqd_work_group_size(" + std::to_string(work_group_size_.x) + ", " +
- std::to_string(work_group_size_.y) + ", " + std::to_string(work_group_size_.z) + ")))\n";
- }
- if (use_simd_broadcast && device_info.IsIntel())
- {
- c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n";
- }
- std::string dst_oob_check;
- if (src_def.HasAxis(Axis::DEPTH))
- {
- if (conv_params.linear_spatial)
- {
- dst_oob_check = "DST_Z >= args.dst_tensor.Depth() || DST_S >= "
- "args.dst_tensor.Slices()";
- }
- else
- {
- dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Z >= "
- "args.dst_tensor.Depth() || DST_S >= args.dst_tensor.Slices()";
- }
- }
- else
- {
- if (conv_params.linear_spatial)
- {
- dst_oob_check = "DST_Y >= args.dst_tensor.Height() || DST_S >= "
- "args.dst_tensor.Slices()";
- }
- else
- {
- dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Y >= "
- "args.dst_tensor.Height() || DST_S >= args.dst_tensor.Slices()";
- }
- }
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += GenerateBlockCoords(conv_params.block_size, work_group_launch_order_,
- conv_params.linear_spatial, src_def.HasAxis(Axis::DEPTH));
- if (!late_oob_check)
- {
- c += " if (" + dst_oob_check + ") {\n";
- c += " return;\n";
- c += " }\n";
- }
- if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
- {
- if (conv_params.linear_spatial)
- {
- c += " int lid = get_local_id(0);\n";
- }
- else
- {
- c += " int lid = get_local_id(1) * " + std::to_string(work_group_size_.x) +
- " + get_local_id(0);\n";
- }
- }
- if (use_simd_broadcast)
- {
- c += " int simd_id = get_sub_group_local_id();\n";
- }
- for (int s = 0; s < block_size.w; ++s)
- {
- const std::string sind = std::to_string(s);
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- c += " ACCUM_FLT4 r" + generate_id_full(xind, yind, zind, sind) +
- " = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- }
- }
- }
- }
- if (!conv_params_.x_kernel_is_1)
- {
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- const std::string xc = "(DST_X + " + xind + ")";
- if (stride_correction)
- {
- c += " int xc" + xind + " = " +
- GetXStrideCorrected(xc, "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
- ";\n";
- }
- else
- {
- c += " int xc" + xind + " = " + xc + " * args.stride_x + args.padding_x;\n";
- }
- }
- }
- else
- {
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- c += " int xc" + xind + " = DST_X + " + xind + ";\n";
- if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
- {
- c += " xc" + xind + " = clamp(xc" + xind + ", 0, args.src_tensor.Width() - 1);\n";
- }
- }
- }
- if (!conv_params_.y_kernel_is_1)
- {
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- const std::string yc = "(DST_Y + " + yind + ")";
- c += " int yc" + yind + " = " + yc + " * args.stride_y + args.padding_y;\n";
- }
- }
- else
- {
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- c += " int yc" + yind + " = DST_Y + " + yind + ";\n";
- if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
- {
- c += " yc" + yind + " = clamp(yc" + yind + ", 0, args.src_tensor.Height() - 1);\n";
- }
- }
- }
- if (src_def.HasAxis(Axis::DEPTH))
- {
- if (!conv_params_.z_kernel_is_1)
- {
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- const std::string zc = "(DST_Z + " + zind + ")";
- c += " int zc" + zind + " = " + zc + " * args.stride_z + args.padding_z;\n";
- }
- }
- else
- {
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- c += " int zc" + zind + " = DST_Z + " + zind + ";\n";
- if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
- {
- c += " zc" + zind + " = clamp(zc" + zind + ", 0, args.src_tensor.Depth() - 1);\n";
- }
- }
- }
- }
- bool trivial_kernel_size = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
- if (src_def.HasAxis(Axis::DEPTH))
- {
- trivial_kernel_size = trivial_kernel_size && conv_params_.z_kernel_is_1;
- }
- if (need_local_mem)
- {
- c += " __local " + weights_data_type + " weights_cache[" + std::to_string(local_mem_size) +
- "];\n";
- }
- else if (conv_params.AreWeightsBuffer())
- {
- c += " " + weights_global_ptr + " weights_cache;\n";
- }
- else if (!trivial_kernel_size)
- {
- c += " int filter_offset = 0;\n";
- }
- if (conv_params.AreWeightsBuffer())
- {
- if (conv_params.different_weights_for_height)
- {
- c += " " + weights_global_ptr +
- " filters_loc = args.weights.GetPtr() + (DST_S * "
- "args.src_tensor.Height() + DST_Y * " +
- std::to_string(block_size.w) + ") * 4 * args.src_tensor.Slices();\n";
- }
- else
- {
- std::string kernel_spatial_offset = "";
- if (!conv_params_.x_kernel_is_1)
- {
- kernel_spatial_offset += " * args.kernel_size_x";
- }
- if (!conv_params_.y_kernel_is_1)
- {
- kernel_spatial_offset += " * args.kernel_size_y";
- }
- if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
- {
- kernel_spatial_offset += " * args.kernel_size_z";
- }
- c += " " + weights_global_ptr +
- " filters_loc = args.weights.GetPtr() + DST_S * 4 * "
- "args.src_tensor.Slices()" +
- kernel_spatial_offset + ";\n";
- }
- }
- if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
- {
- c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zck = "zck" + std::to_string(z);
- c += " int zck" + std::to_string(z) + " = kz * args.dilation_z + zc" + std::to_string(z) +
- ";\n";
- if (!src_def.SupportsZeroClamp(Axis::DEPTH))
- {
- c += " bool in_z" + std::to_string(z) + " = " + zck + " >= 0 && " + zck +
- " < args.src_tensor.Depth();\n";
- if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
- {
- c += " " + zck + " = clamp(" + zck + ", 0, args.src_tensor.Depth() - 1);\n";
- }
- }
- }
- }
- if (!conv_params_.y_kernel_is_1)
- {
- c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yck = "yck" + std::to_string(y);
- c += " int " + yck + " = ky * args.dilation_y + yc" + std::to_string(y) + ";\n";
- if (!src_def.SupportsZeroClamp(Axis::HEIGHT))
- {
- c += " bool in_y" + std::to_string(y) + " = " + yck + " >= 0 && " + yck +
- " < args.src_tensor.Height();\n";
- if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
- {
- c += " " + yck + " = clamp(" + yck + ", 0, args.src_tensor.Height() - 1);\n";
- }
- }
- }
- }
- if (!conv_params_.x_kernel_is_1)
- {
- c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xck = "xck" + std::to_string(x);
- c += " int xck" + std::to_string(x) + " = kx * args.dilation_x + xc" + std::to_string(x) +
- ";\n";
- if (!src_def.SupportsZeroClamp(Axis::WIDTH))
- {
- c += " bool in_x" + std::to_string(x) + " = " + xck + " >= 0 && " + xck +
- " < args.src_tensor.Width();\n";
- if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
- {
- c += " " + xck + " = clamp(" + xck + ", 0, args.src_tensor.Width() - 1);\n";
- }
- }
- }
- }
- const bool need_multiple_slice_strides =
- src_def.ReturnsZeroForNegOneRead() && !trivial_kernel_size;
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
- std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
- const std::string id = generate_id(xind, yind, zind);
- std::string coords = "" + xc + ", " + yc;
- if (src_def.HasAxis(Axis::DEPTH))
- {
- std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
- coords += ", " + zc;
- }
- if (src_def.IsLinear())
- {
- c += " args.src_tensor.GetAddress(addr" + id + ", " + coords + ", 0);\n";
- if (need_multiple_slice_strides)
- {
- const std::string check = generate_check(xind, yind, zind);
- c += " addr" + id + " = select(-1, addr" + id + ", (" + check + "));\n";
- c +=
- " int ds" + id + " = select(0, args.src_tensor.SliceStride(), (" + check + "));\n";
- }
- }
- }
- }
- }
- if (src_def.IsLinear() && !need_multiple_slice_strides)
- {
- c += " int ds = args.src_tensor.SliceStride();\n";
- }
-
- auto declare_src = [&]() {
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- const std::string id = generate_id(xind, yind, zind);
- c += " " + weights_data_type + " src" + id + ";\n";
- }
- }
- }
- };
- const bool conditional_read = device_info.IsMali();
- auto read_src = [&]() {
- const std::string cl_type = ToCLDataType(conv_params.weights_data_type);
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- std::string id = generate_id(xind, yind, zind);
- const std::string check = generate_check(xind, yind, zind);
- std::string address;
- if (src_def.IsLinear())
- {
- address = "addr" + id;
- }
- else
- {
- std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
- std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
- address = "" + xc + ", " + yc;
- if (src_def.HasAxis(Axis::DEPTH))
- {
- std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
- address += ", " + zc;
- }
- address += ", s";
- }
- if (src_def.ReturnsZeroForNegOneRead())
- {
- c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
- const std::string ds = trivial_kernel_size ? "ds" : "ds" + id;
- c += " " + address + " += " + ds + ";\n";
- }
- else
- {
- if (!check.empty())
- {
- if (conditional_read)
- {
- c += " src" + id + " = " + check + " ? args.src_tensor.Read<" + cl_type + ">(" +
- address + ") : (FLT4)(0.0f);\n";
- }
- else
- {
- c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address +
- ") * (FLT)(" + check + ");\n";
- }
- }
- else
- {
- c += " src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
- }
- if (src_def.IsLinear())
- {
- c += " " + address + " += ds;\n";
- }
- }
- }
- }
- }
- };
- const bool weights_type_as_accum_type = !(op_def.precision == CalculationsPrecision::F32_F16 &&
- conv_params.weights_data_type == DataType::FLOAT16);
- auto conv_core = [&](int shared_offset) {
- const std::string channels[] = {"x", "y", "z", "w"};
- for (int s = 0; s < block_size.w; ++s)
- {
- const std::string sind = std::to_string(s);
- if (weights_type_as_accum_type)
- {
- for (int ch = 0; ch < 4; ++ch)
- {
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- std::string R = "r" + generate_id_full(xind, yind, zind, sind);
- std::string S = "src" + generate_id(xind, yind, zind);
- if (use_simd_broadcast)
- {
- int simd_id = (s * 4 + ch + shared_offset) / simd_size;
- int thread_id = (s * 4 + ch + shared_offset) % simd_size;
- std::string w_val_x = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
- ".x, " + std::to_string(thread_id) + "u)";
- std::string w_val_y = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
- ".y, " + std::to_string(thread_id) + "u)";
- std::string w_val_z = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
- ".z, " + std::to_string(thread_id) + "u)";
- std::string w_val_w = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
- ".w, " + std::to_string(thread_id) + "u)";
- c += " " + R + ".x += " + w_val_x + " * " + S + "." + channels[ch] + ";\n";
- c += " " + R + ".y += " + w_val_y + " * " + S + "." + channels[ch] + ";\n";
- c += " " + R + ".z += " + w_val_z + " * " + S + "." + channels[ch] + ";\n";
- c += " " + R + ".w += " + w_val_w + " * " + S + "." + channels[ch] + ";\n";
- }
- else
- {
- const std::string weight_id = std::to_string(s * 4 + ch + shared_offset);
- std::string w_val;
- if (conv_params.AreWeightsBuffer())
- {
- w_val = "weights_cache[" + weight_id + "]";
- }
- else
- {
- w_val = "f" + weight_id;
- }
- c += " " + R + " += " + w_val + " * " + S + "." + channels[ch] + ";\n";
- }
- }
- }
- }
- }
- }
- else
- { // F32_F16 precision and weights type is float16
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- std::string R = "r" + generate_id_full(xind, yind, zind, sind);
- std::string S = "src" + generate_id(xind, yind, zind);
- std::vector<std::string> F(4);
- for (int i = 0; i < 4; ++i)
- {
- std::string weight_id = std::to_string(s * 4 + i + shared_offset);
- if (conv_params.AreWeightsBuffer())
- {
- F[i] = "weights_cache[" + weight_id + "]";
- }
- else
- {
- F[i] = "f" + weight_id;
- }
- }
- c += " " + R + " += convert_float4(" + S + ".x * " + F[0] + " + " + S + ".y * " +
- F[1] + " + " + S + ".z * " + F[2] + " + " + S + ".w * " + F[3] + ");\n";
- }
- }
- }
- }
- }
- };
-
- c += " int s = 0;\n";
- c += " do {\n";
- declare_src();
- const int total_work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z;
- if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
- {
- c += GenerateAsyncUpload("weights_cache", "filters_loc",
- /*global_offset_name*/ "", local_mem_size);
- }
- else if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
- {
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- c +=
- GenerateUploadByThreads("weights_cache", "filters_loc",
- /*global_offset_name*/ "", "lid", total_work_items, local_mem_size);
- }
- else if (use_simd_broadcast)
- {
- int parts = local_mem_size / simd_size;
- int reminder = local_mem_size % simd_size;
- for (int i = 0; i < parts; ++i)
- {
- c += " FLT4 simd_w" + std::to_string(i) + " = filters_loc[simd_id + " +
- std::to_string(i * simd_size) + "];\n";
- }
- if (reminder)
- {
- c += " FLT4 simd_w" + std::to_string(parts) + ";\n";
- c += " if (simd_id < " + std::to_string(reminder) + ") {\n";
- c += " simd_w" + std::to_string(parts) + " = filters_loc[simd_id + " +
- std::to_string(parts * simd_size) + "];\n";
- c += " }\n";
- }
- }
- else if (conv_params.AreWeightsBuffer())
- { // GLOBAL_MEM/CONSTANT_MEM
- c += " weights_cache = filters_loc;\n";
- }
- else
- { // TEXTURES_MEM
- for (int dst_s = 0; dst_s < block_size.w; ++dst_s)
- {
- std::string f_y = trivial_kernel_size ? "s" : "filter_offset";
- if (conv_params.different_weights_for_height)
- {
- f_y = "DST_Y * args.src_tensor.Slices() + s";
- }
- c += absl::Substitute(
- R"( FLT4 f$2 = args.weights0.Read(DST_S + $0, $1);
- FLT4 f$3 = args.weights1.Read(DST_S + $0, $1);
- FLT4 f$4 = args.weights2.Read(DST_S + $0, $1);
- FLT4 f$5 = args.weights3.Read(DST_S + $0, $1);
-)",
- dst_s, f_y, dst_s * 4 + 0, dst_s * 4 + 1, dst_s * 4 + 2, dst_s * 4 + 3);
- }
- if (!trivial_kernel_size)
- {
- c += " filter_offset++;\n";
- }
- }
- read_src();
- c += " s += 1;\n";
- if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
- {
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- }
- conv_core(0);
- for (int i = 1; i < conv_params.src_depth_loop_size; ++i)
- {
- read_src();
- conv_core(i * block_size.w * 4);
- c += " s += 1;\n";
- }
- if (conv_params.AreWeightsBuffer())
- {
- c += " filters_loc += " + std::to_string(local_mem_size) + ";\n";
- }
- c += " } while (s < args.src_tensor.Slices());\n";
- if (!conv_params.x_kernel_is_1)
- {
- c += " };\n";
- }
- if (!conv_params.y_kernel_is_1)
- {
- c += " };\n";
- }
- if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
- {
- c += " };\n";
- }
- if (conv_params.AreWeightsBuffer())
- {
- if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
- {
- c += GenerateAsyncUpload("weights_cache", "args.biases.GetPtr()", "DST_S", block_size.w);
- }
- else if (conv_params.weights_upload_type ==
- ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
- {
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- c += GenerateUploadByThreads("weights_cache", "args.biases.GetPtr()", "DST_S", "lid",
- total_work_items, block_size.w);
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- }
- else
- {
- c += " weights_cache = args.biases.GetPtr() + DST_S;\n";
- }
- }
- if (late_oob_check)
- {
- c += " if (" + dst_oob_check + ") {\n";
- c += " return;\n";
- c += " }\n";
- }
-
- auto generate_dst_check = [&](int x, int y, int z) {
- std::string check;
- const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
- const std::vector<std::string> names{"Width()", "Height()", "Depth()"};
- std::vector<std::string> coords(3);
- coords[0] = "DST_X + " + std::to_string(x);
- coords[1] = "DST_Y + " + std::to_string(y);
- coords[2] = "DST_Z + " + std::to_string(z);
- const std::vector<int> ids{x, y, z};
- for (size_t i = 0; i < axes.size(); ++i)
- {
- const auto &axis = axes[i];
- if (src_def.HasAxis(axis) && ids[i] != 0)
- {
- if (!check.empty())
- {
- check += " && ";
- }
- check += coords[i] + " < args.dst_tensor." + names[i];
- }
- }
- return check;
- };
-
- for (int s = 0; s < block_size.w; ++s)
- {
- const std::string sind = std::to_string(s);
- c += " if (DST_S + " + sind + " >= args.dst_tensor.Slices()) return;\n";
- c += " {\n";
- if (conv_params.AreWeightsBuffer())
- {
- c += " FLT4 bias_val = TO_FLT4(weights_cache[" + sind + "]);\n";
- }
- else
- {
- c += " FLT4 bias_val = args.biases.Read(DST_S + " + sind + ");\n";
- }
- for (int z = 0; z < block_size.z; ++z)
- {
- const std::string zind = std::to_string(z);
- for (int y = 0; y < block_size.y; ++y)
- {
- const std::string yind = std::to_string(y);
- for (int x = 0; x < block_size.x; ++x)
- {
- const std::string xind = std::to_string(x);
- const std::string id = generate_id_full(xind, yind, zind, sind);
- const std::string check = generate_dst_check(x, y, z);
- std::string coords = "DST_X + " + xind + ", DST_Y + " + yind;
- if (src_def.HasAxis(Axis::DEPTH))
- {
- coords += ", DST_Z + " + zind;
- }
- coords += ", DST_S + " + sind;
- if (!check.empty())
- {
- c += " if (" + check + ") {\n";
- }
- else
- {
- c += " {\n";
- }
- c += " FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n";
- c += " args.dst_tensor.Write(res, " + coords + ");\n";
- c += " }\n";
- }
- }
- }
- c += " }\n";
- }
- c += "}\n";
- return c;
-}
-
-ConvPowerVR::ConvParams
-ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
- bool different_weights_for_height, const BHWC *dst_shape)
-{
- ConvParams conv_params;
- conv_params.linear_spatial = false;
- conv_params.weights_data_type = DeduceDataTypeFromPrecision(definition.precision);
- conv_params.x_kernel_is_1 = x_kernel_is_1;
- conv_params.y_kernel_is_1 = y_kernel_is_1;
- conv_params.different_weights_for_height = different_weights_for_height;
- if (device_info.IsNvidia())
- {
- if (different_weights_for_height)
- {
- work_group_size_ = int3(32, 1, 1);
- work_group_launch_order_ = int3(2, 0, 1);
- conv_params.fixed_work_group_size = true;
- }
- else
- {
- conv_params.linear_spatial = true;
- work_group_size_ = int3(32, 1, 1);
- work_group_launch_order_ = int3(1, 0, 2);
- conv_params.fixed_work_group_size = true;
- }
- conv_params.block_size = int4(2, 1, 1, 4);
- conv_params.src_depth_loop_size = 1;
- conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
- if (dst_depth % 4 == 0 || dst_depth >= 8)
- {
- conv_params.block_size.w = 4;
- }
- else if (dst_depth % 2 == 0 || dst_depth >= 4)
- {
- conv_params.block_size.w = 2;
- }
- else
- {
- conv_params.block_size.w = dst_depth;
- }
- if (dst_shape)
- {
- int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
- float task_size_per_cu = static_cast<float>(task_size) / device_info.compute_units_count;
- int block_size =
- conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w;
- float threads_per_cu = task_size_per_cu / block_size;
- float warps_per_cu = threads_per_cu / 32 /*warp_size*/;
- if (warps_per_cu < 8.0f)
- {
- conv_params.block_size.x = 1;
- }
- if (warps_per_cu < 4.0f && conv_params.block_size.w >= 4)
- {
- conv_params.block_size.w /= 2;
- }
- if (warps_per_cu < 2.0f && conv_params.block_size.w >= 2)
- {
- conv_params.block_size.w /= 2;
- }
- }
- if (src_depth % 2 == 0)
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
- {
- conv_params.src_depth_loop_size = 4;
- }
- }
- else if (device_info.IsPowerVR())
- {
- if (different_weights_for_height)
- {
- work_group_size_ = int3(32, 1, 1);
- work_group_launch_order_ = int3(2, 0, 1);
- conv_params.fixed_work_group_size = true;
- }
- else
- {
- conv_params.linear_spatial = true;
- work_group_size_ = int3(32, 1, 1);
- work_group_launch_order_ = int3(1, 0, 2);
- conv_params.fixed_work_group_size = true;
- }
- conv_params.weights_data_type =
- definition.precision == CalculationsPrecision::F16 ? DataType::FLOAT16 : DataType::FLOAT32;
- conv_params.block_size = int4(1, 1, 1, 4);
- conv_params.src_depth_loop_size = 1;
- conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
- if (dst_depth % 8 == 0 || dst_depth >= 32)
- {
- conv_params.block_size.w = 8;
- }
- else if (dst_depth % 4 == 0 || dst_depth >= 8)
- {
- conv_params.block_size.w = 4;
- }
- else if (dst_depth % 2 == 0 || dst_depth >= 4)
- {
- conv_params.block_size.w = 2;
- }
- else
- {
- conv_params.block_size.w = dst_depth;
- }
- if (definition.precision == CalculationsPrecision::F16)
- {
- conv_params.block_size.w = std::min(4, conv_params.block_size.w);
- if (src_depth % 2 == 0)
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
- {
- conv_params.src_depth_loop_size = 4;
- }
- if (conv_params.block_size.w == 1)
- {
- if (src_depth % 2 == 0)
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0)
- {
- conv_params.src_depth_loop_size = 4;
- }
- if (src_depth <= 8)
- {
- conv_params.src_depth_loop_size = src_depth;
- }
- }
- conv_params.block_size.x = 2;
- }
- }
- else if (device_info.IsAMD())
- {
- if (different_weights_for_height)
- {
- work_group_size_ = int3(32, 1, 1);
- work_group_launch_order_ = int3(2, 0, 1);
- conv_params.fixed_work_group_size = true;
- }
- else
- {
- work_group_size_ = int3(8, 4, 1);
- work_group_launch_order_ = int3(2, 0, 1);
- conv_params.fixed_work_group_size = true;
- }
-
- conv_params.block_size = int4(2, 1, 1, 1);
- if (x_kernel_is_1 && y_kernel_is_1)
- {
- conv_params.block_size.y = 2;
- }
- conv_params.src_depth_loop_size = 1;
- conv_params.weights_upload_type = WeightsUploadType::CONSTANT_MEM;
- if (dst_depth % 8 == 0 || dst_depth >= 32)
- {
- conv_params.block_size.w = 8;
- }
- else if (dst_depth % 4 == 0 || dst_depth >= 8)
- {
- conv_params.block_size.w = 4;
- }
- else if (dst_depth % 2 == 0 || dst_depth >= 4)
- {
- conv_params.block_size.w = 2;
- }
- else
- {
- conv_params.block_size.w = 1;
- }
- if (src_depth % 2 == 0 && src_depth >= 16)
- {
- conv_params.src_depth_loop_size = 2;
- }
- }
- else if (device_info.IsMali())
- {
- int block_size = 2;
- if (dst_shape)
- {
- int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
- block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
- }
- if (!x_kernel_is_1 || !y_kernel_is_1)
- {
- block_size = std::min(block_size, 4);
- }
- if (block_size == 8)
- {
- if (dst_depth == 1 || dst_depth == 3)
- {
- conv_params.block_size = int4(2, 2, 1, 1);
- }
- else
- {
- conv_params.block_size = int4(2, 2, 1, 2);
- }
- }
- else if (block_size == 4)
- {
- if (dst_depth == 1 || dst_depth == 3)
- {
- conv_params.block_size = int4(2, 2, 1, 1);
- }
- else
- {
- conv_params.block_size = int4(2, 1, 1, 2);
- }
- }
- else if (block_size == 2)
- {
- conv_params.block_size = int4(2, 1, 1, 1);
- }
- else
- {
- conv_params.block_size = int4(1, 1, 1, 1);
- }
- conv_params.src_depth_loop_size = 1;
- MaliInfo mali_info = device_info.mali_info;
- if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard())
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0 && block_size == 1 && !mali_info.IsMidgard() &&
- definition.precision == CalculationsPrecision::F16)
- {
- conv_params.src_depth_loop_size = 4;
- }
- work_group_size_ = int3(4, 4, 1);
- work_group_launch_order_ = int3(0, 1, 2);
- conv_params.fixed_work_group_size = false;
- conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
- }
- else if (device_info.IsAdreno())
- {
- conv_params.block_size = int4(2, 2, 1, 2);
- if (device_info.IsAdreno3xx())
- {
- if (definition.precision == CalculationsPrecision::F16)
- {
- conv_params.block_size = int4(2, 2, 1, 2);
- }
- else if (definition.precision == CalculationsPrecision::F32_F16)
- {
- conv_params.block_size = int4(2, 1, 1, 2);
- }
- else
- { // F32
- conv_params.block_size = int4(2, 2, 1, 1);
- }
- }
- work_group_size_ = int3(8, 2, 1);
- work_group_launch_order_ = int3(0, 1, 2);
- conv_params.fixed_work_group_size = false;
- conv_params.src_depth_loop_size = 1;
- if (definition.src_tensors.size() == 2)
- {
- // dynamic weights supported only with buffers.
- conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
- }
- else
- {
- conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM_X4;
- }
- }
- else if (device_info.IsIntel())
- {
- if (different_weights_for_height)
- {
- work_group_size_ = int3(16, 1, 1);
- work_group_launch_order_ = int3(0, 1, 2);
- conv_params.fixed_work_group_size = true;
- }
- else
- {
- conv_params.linear_spatial = true;
- work_group_size_ = int3(16, 1, 1);
- work_group_launch_order_ = int3(0, 1, 2);
- conv_params.fixed_work_group_size = true;
- }
- conv_params.block_size = int4(1, 1, 1, 4);
- conv_params.src_depth_loop_size = 1;
- int sub_group_size = 16;
- const bool supports_subgroups = device_info.SupportsExtension("cl_khr_subgroups") ||
- device_info.SupportsExtension("cl_intel_subgroups");
- if (definition.precision != CalculationsPrecision::F32_F16 && supports_subgroups &&
- device_info.SupportsExtension("cl_intel_required_subgroup_size") &&
- device_info.SupportsSubGroupWithSize(sub_group_size))
- {
- conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
- conv_params.simd_size = sub_group_size;
- }
- else
- {
- conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
- }
- if (dst_depth % 4 == 0 || dst_depth >= 8)
- {
- conv_params.block_size.w = 4;
- }
- else if (dst_depth % 2 == 0 || dst_depth >= 4)
- {
- conv_params.block_size.w = 2;
- }
- else
- {
- conv_params.block_size.w = dst_depth;
- }
- if (src_depth % 2 == 0)
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
- {
- conv_params.src_depth_loop_size = 4;
- }
- }
- else
- {
- conv_params.block_size = int4(1, 1, 1, 4);
- work_group_size_ = int3(8, 2, 1);
- work_group_launch_order_ = int3(0, 1, 2);
- conv_params.fixed_work_group_size = false;
- conv_params.src_depth_loop_size = 1;
- conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
- if (dst_depth % 4 == 0 || dst_depth >= 8)
- {
- conv_params.block_size.w = 4;
- }
- else if (dst_depth % 2 == 0 || dst_depth >= 4)
- {
- conv_params.block_size.w = 2;
- }
- else
- {
- conv_params.block_size.w = dst_depth;
- }
- if (src_depth % 2 == 0)
- {
- conv_params.src_depth_loop_size = 2;
- }
- if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
- {
- conv_params.src_depth_loop_size = 4;
- }
- }
-
- return conv_params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
- attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
- attr.padding.appended.w == 0;
- const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
- attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
- attr.padding.appended.h == 0;
- return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
- y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution3DAttributes &attr,
- const BHWDC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
- attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
- attr.padding.appended.w == 0;
- const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
- attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
- attr.padding.appended.h == 0;
- const bool z_kernel_is_1 = attr.weights.shape.d == 1 && attr.strides.d == 1 &&
- attr.dilations.d == 1 && attr.padding.prepended.d == 0 &&
- attr.padding.appended.d == 0;
-
- ConvPowerVR::ConvParams result;
- BHWC shape;
- if (dst_shape)
- {
- shape.b = dst_shape->b;
- shape.h = dst_shape->h * dst_shape->d;
- shape.w = dst_shape->w;
- shape.c = dst_shape->c;
- result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
- y_kernel_is_1, false, &shape);
- }
- else
- {
- result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
- y_kernel_is_1, false, nullptr);
- }
- result.z_kernel_is_1 = z_kernel_is_1;
- return result;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape,
- const BHWC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(weights_shape.b, 4);
- const int src_depth = DivideRoundUp(weights_shape.c, 4);
- const bool x_kernel_is_1 = weights_shape.w == 1 && attr.strides.w == 1 && attr.dilations.w == 1 &&
- attr.padding.prepended.w == 0 && attr.padding.appended.w == 0;
- const bool y_kernel_is_1 = weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 &&
- attr.padding.prepended.h == 0 && attr.padding.appended.h == 0;
- return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
- y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
- const OperationDef &definition,
- const FullyConnectedAttributes &attr,
- const BHWC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- ConvPowerVR::ConvParams params =
- GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, false, dst_shape);
- work_group_size_.x *= work_group_size_.y;
- work_group_size_.y = 1;
- params.block_size.x *= params.block_size.y;
- params.block_size.y = 1;
- return params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape)
-{
- const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
- const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
- ConvPowerVR::ConvParams params =
- GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, true, dst_shape);
- params.block_size.x *= params.block_size.y;
- params.block_size.y = 1;
- return params;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *dst_shape)
-{
- ConvPowerVR result(definition, attr, device_info, dst_shape);
- result.GenerateCode(device_info);
- result.UploadData(attr.weights, attr.bias);
- return result;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
- const FullyConnectedAttributes &attr, const BHWC *dst_shape)
-{
- ConvPowerVR result(definition, attr, device_info, dst_shape);
- result.GenerateCode(device_info);
- result.UploadData(attr.weights, attr.bias);
- return result;
-}
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape, const BHWC *dst_shape)
-{
- ConvPowerVR result(definition, attr, weights_shape, device_info, dst_shape);
- result.GenerateCode(device_info);
- result.UploadBias(attr.bias);
- return result;
-}
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape)
-{
- ConvPowerVR result(definition);
- result.conv_params_ = result.GuessBestParamsWinograd(device_info, definition, attr, dst_shape);
- result.GenerateCode(device_info);
- result.UploadDataForWinograd4x4To6x6(attr.weights);
- return result;
-}
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution3DAttributes &attr, const BHWDC *dst_shape)
-{
- ConvPowerVR result(definition, attr, device_info, dst_shape);
- result.GenerateCode(device_info);
- result.UploadWeights(attr.weights);
- result.UploadBias(attr.bias);
- return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h
deleted file mode 100644
index f83f05730..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-
-#include <cstring>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvPowerVR : public GPUOperation
-{
-public:
- ConvPowerVR() = default;
- void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const override;
- absl::Status BindArguments(ArgumentsBinder *args) override;
- int3 GetGridSize() const override;
-
- ConvWeightsDescription GetConvWeightsDescription() const
- {
- ConvWeightsDescription desc;
- desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
- desc.output_group_size = conv_params_.block_size.w;
- return desc;
- }
-
- // Move only
- ConvPowerVR(ConvPowerVR &&operation);
- ConvPowerVR &operator=(ConvPowerVR &&operation);
- ConvPowerVR(const ConvPowerVR &) = delete;
- ConvPowerVR &operator=(const ConvPowerVR &) = delete;
-
-private:
- enum class WeightsUploadType
- {
- LOCAL_MEM_ASYNC_SUBGROUP, // we use it for PowerVR with workgroup size = 32
- LOCAL_MEM_BY_THREADS,
- GLOBAL_MEM,
- CONSTANT_MEM,
- PRIVATE_MEM_SIMD_BROADCAST,
- TEXTURES_MEM_X4, // 4 textures for weights
- };
-
- struct ConvParams
- {
- // Usually we use this combinations for CalculationPrecision:
- // F32: all F32
- // F16: all F16
- // F32_F16: all besides accumulator is F16, including weights
- // But for PowerVR we can achieve better performance in F32_F16 with F32
- // weights, so for PowerVR in this kernel we have F32 weights for
- // F32_F16 precision mode
- DataType weights_data_type; // used for weights and biases
- int4 block_size; // WHDS
- bool fixed_work_group_size;
- bool linear_spatial; // spatial dimensions are Width/Height/Depth
- bool different_weights_for_height;
- int src_depth_loop_size;
- WeightsUploadType weights_upload_type;
- bool x_kernel_is_1;
- bool y_kernel_is_1;
- bool z_kernel_is_1;
-
- // used only with PRIVATE_MEM_SIMD_BROADCAST
- int simd_size = 1;
-
- bool AreWeightsBuffer() const
- {
- return weights_upload_type != WeightsUploadType::TEXTURES_MEM_X4;
- }
-
- bool IsPrivateMemBroadcast() const
- {
- return weights_upload_type == WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
- }
- };
-
- ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
- const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
- ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
- const BHWC &weights_shape, const DeviceInfo &device_info,
- const BHWC *dst_shape = nullptr);
- ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
- const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
- explicit ConvPowerVR(const OperationDef &definition);
- ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
- const DeviceInfo &device_info, const BHWDC *dst_shape = nullptr);
-
- void GenerateCode(const DeviceInfo &device_info);
-
- template <DataType T>
- void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
- template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
- template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
- template <DataType T> void UploadWeights(const InternalTensor<OHWDI, T> &weights);
-
- template <DataType T> void UploadBias(const InternalTensor<Linear, T> &bias);
-
- friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *dst_shape);
-
- friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
- const OperationDef &definition,
- const FullyConnectedAttributes &attr, const BHWC *dst_shape);
-
- friend ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape,
- const BHWC *dst_shape);
-
- friend ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape);
-
- friend ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution3DAttributes &attr,
- const BHWDC *dst_shape);
-
- ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
- ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC &weights_shape,
- const BHWC *dst_shape = nullptr);
- ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- const FullyConnectedAttributes &attr, const BHWC *dst_shape = nullptr);
- ConvParams GuessBestParamsWinograd(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape = nullptr);
- ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution3DAttributes &attr, const BHWDC *dst_shape = nullptr);
- ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
- int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
- bool different_weights_for_height, const BHWC *dst_shape = nullptr);
-
- std::string GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
- bool stride_correction, const ConvParams &conv_params);
-
- int4 stride_;
- int4 padding_;
- int4 kernel_size_;
- int4 dilation_;
- ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvPowerVR::UploadData(const InternalTensor<OHWI, T> &weights,
- const InternalTensor<Linear, T> &biases)
-{
- UploadWeights(weights);
- UploadBias(biases);
-}
-
-template <DataType T>
-void ConvPowerVR::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
- InternalTensor<OHWI, T> wino_weights;
- RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
- UploadWeights(wino_weights);
- InternalTensor<Linear, DataType::FLOAT32> biases;
- biases.shape = Linear(weights.shape.o);
- biases.data.resize(weights.shape.o, 0.0f);
- UploadBias(biases);
-}
-
-template <DataType T> void ConvPowerVR::UploadBias(const InternalTensor<Linear, T> &bias)
-{
- BufferDescriptor desc;
- desc.element_type = conv_params_.weights_data_type;
- desc.element_size = 4;
- desc.memory_type =
- conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
- ? MemoryType::CONSTANT
- : MemoryType::GLOBAL;
- const int float_size = sizeof(float);
- // TODO
- // conv_params_.weights_data_type == DataType::FLOAT32 ? sizeof(float) : sizeof(half);
- int aligned_channels = AlignByN(bias.shape.v, 4 * conv_params_.block_size.w);
- desc.size = float_size * aligned_channels;
- desc.data.resize(desc.size);
- if (conv_params_.weights_data_type == DataType::FLOAT32)
- {
- float *gpu_data = reinterpret_cast<float *>(desc.data.data());
- for (int i = 0; i < aligned_channels; ++i)
- {
- gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
- }
- }
- // else
- // {
- // half *gpu_data = reinterpret_cast<half *>(desc.data.data());
- // for (int i = 0; i < aligned_channels; ++i)
- // {
- // gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
- // }
- // }
- args_.AddObject("biases", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
- const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), conv_params_.block_size.w);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
- const bool f32_weights = conv_params_.weights_data_type == DataType::FLOAT32;
- const int float4_size = sizeof(float4);
- // TODO
- // f32_weights ? sizeof(float4) : sizeof(half4);
-
- const int elements_count = weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
-
- std::vector<uint8_t> data(float4_size * elements_count);
-
- if (f32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(data.data());
- if (conv_params_.AreWeightsBuffer())
- {
- RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
- absl::MakeSpan(ptr, elements_count));
- }
- else
- {
- RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
- absl::MakeSpan(ptr, elements_count));
- }
- }
- // else
- // {
- // half4 *ptr = reinterpret_cast<half4 *>(data.data());
- // if (conv_params_.AreWeightsBuffer())
- // {
- // RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
- // absl::MakeSpan(ptr, elements_count));
- // }
- // else
- // {
- // RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
- // absl::MakeSpan(ptr, elements_count));
- // }
- // }
- if (conv_params_.AreWeightsBuffer())
- {
- BufferDescriptor desc;
- desc.element_type = conv_params_.weights_data_type;
- desc.element_size = 4;
- desc.memory_type =
- conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
- ? MemoryType::CONSTANT
- : MemoryType::GLOBAL;
- desc.size = float4_size * elements_count;
- desc.data = std::move(data);
- args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
- }
- else
- {
- const int texture_width = dst_slices;
- const int texture_height = src_slices * weights.shape.h * weights.shape.w;
- const int sub_size = float4_size * texture_width * texture_height;
- for (int i = 0; i < 4; ++i)
- {
- Texture2DDescriptor desc;
- desc.element_type = conv_params_.weights_data_type;
- desc.size = int2(texture_width, texture_height);
- desc.data.resize(sub_size);
- std::memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
- const std::string name = "weights" + std::to_string(i);
- args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
- }
- }
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWDI, T> &weights)
-{
- const int block_size = conv_params_.block_size.w;
- const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), block_size);
- const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
- const int elements_count =
- weights.shape.d * weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
- const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
-
- const int float4_size = f32_weights ? 16 : 8;
-
- std::vector<uint8_t> data(float4_size * elements_count);
-
- if (f32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(data.data());
- if (conv_params_.AreWeightsBuffer())
- {
- RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
- absl::MakeSpan(ptr, elements_count));
- }
- else
- {
- RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
- absl::MakeSpan(ptr, elements_count));
- }
- }
- // else
- // {
- // half4 *ptr = reinterpret_cast<half4 *>(data.data());
- // if (conv_params_.AreWeightsBuffer())
- // {
- // RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
- // absl::MakeSpan(ptr, elements_count));
- // }
- // else
- // {
- // RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
- // absl::MakeSpan(ptr, elements_count));
- // }
- // }
-
- if (conv_params_.AreWeightsBuffer())
- {
- BufferDescriptor desc;
- desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 4;
- desc.size = float4_size * elements_count;
- desc.data = std::move(data);
- args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
- }
- else
- {
- const int texture_width = dst_slices;
- const int texture_height = src_slices * weights.shape.d * weights.shape.h * weights.shape.w;
- int sub_size = float4_size * texture_width * texture_height;
- for (int i = 0; i < 4; ++i)
- {
- Texture2DDescriptor desc;
- desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.size = int2(texture_width, texture_height);
- desc.data.resize(sub_size);
- memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
- const std::string name = "weights" + std::to_string(i);
- args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
- }
- }
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
- const FullyConnectedAttributes &attr,
- const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC &weights_shape,
- const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
- const OperationDef &definition,
- const Convolution2DAttributes &attr,
- const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
- const Convolution3DAttributes &attr,
- const BHWDC *dst_shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc
deleted file mode 100644
index 95172bd05..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "open_cl/kernels/ConvWeightsConverter.h"
-
-#include <string>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ConverterToConvWeights::ConverterToConvWeights(const OperationDef &definition,
- const ConvWeightsDescription &conv_weights_desc)
- : GPUOperation(definition), conv_weights_desc_(conv_weights_desc)
-{
- code_ = GetConverterToConvWeightsCode(definition_, conv_weights_desc_);
-}
-
-ConverterToConvWeights::ConverterToConvWeights(ConverterToConvWeights &&operation)
- : GPUOperation(std::move(operation)), conv_weights_desc_(operation.conv_weights_desc_)
-{
-}
-
-ConverterToConvWeights &ConverterToConvWeights::operator=(ConverterToConvWeights &&operation)
-{
- if (this != &operation)
- {
- conv_weights_desc_ = operation.conv_weights_desc_;
- GPUOperation::operator=(std::move(operation));
- }
- return *this;
-}
-
-std::string ConverterToConvWeights::GetConverterToConvWeightsCode(
- const OperationDef &op_def, const ConvWeightsDescription &conv_weights_desc)
-{
- AddSrcTensor("src_tensor", op_def.src_tensors[0]);
- AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
- args_.AddFloat("mask_x");
- args_.AddFloat("mask_y");
- args_.AddFloat("mask_z");
- args_.AddFloat("mask_w");
-
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int GROUP_SIZE = " + std::to_string(conv_weights_desc.output_group_size) + ";\n";
- c += " int O = get_global_id(0) * 4;\n";
- c += " int I = get_global_id(1);\n";
- c += " int Z = get_global_id(2);\n";
- c += " int W = Z % args.src_tensor.Width();\n";
- c += " int H = Z / args.src_tensor.Width();\n";
- c += " if (O >= args.src_tensor.Batch() || I >= args.src_tensor.Slices() || "
- "H >= args.src_tensor.Height()) return;\n";
- c += " FLT4 v0 = args.src_tensor.Read(W, H, I, O + 0);\n";
- c += " FLT4 v1 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- c += " FLT4 v2 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- c += " FLT4 v3 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- c += " if (O + 1 < args.src_tensor.Batch()) {\n";
- c += " v1 = args.src_tensor.Read(W, H, I, O + 1);\n";
- c += " }\n";
- c += " if (O + 2 < args.src_tensor.Batch()) {\n";
- c += " v2 = args.src_tensor.Read(W, H, I, O + 2);\n";
- c += " }\n";
- c += " if (O + 3 < args.src_tensor.Batch()) {\n";
- c += " v3 = args.src_tensor.Read(W, H, I, O + 3);\n";
- c += " }\n";
- c += " if (I == args.src_tensor.Slices() - 1) {\n";
- c += " FLT4 mask = (FLT4)(args.mask_x, args.mask_y, args.mask_z, "
- "args.mask_w);\n";
- c += " v0 *= mask;\n";
- c += " v1 *= mask;\n";
- c += " v2 *= mask;\n";
- c += " v3 *= mask;\n";
- c += " }\n";
- c += " FLT4 r0 = (FLT4)(v0.x, v1.x, v2.x, v3.x);\n";
- c += " FLT4 r1 = (FLT4)(v0.y, v1.y, v2.y, v3.y);\n";
- c += " FLT4 r2 = (FLT4)(v0.z, v1.z, v2.z, v3.z);\n";
- c += " FLT4 r3 = (FLT4)(v0.w, v1.w, v2.w, v3.w);\n";
- c += " int d_index = O / (GROUP_SIZE * 4);\n";
- c += " int k_index = (O % (GROUP_SIZE * 4)) / 4;\n";
- c += " int dst_offset = (((d_index * args.src_tensor.Height() + H) * "
- "args.src_tensor.Width() + W) * "
- "args.src_tensor.Slices() + I) * GROUP_SIZE + "
- "k_index;\n";
- c += " int address0 = dst_offset * 4 + 0;\n";
- c += " int address1 = dst_offset * 4 + 1;\n";
- c += " int address2 = dst_offset * 4 + 2;\n";
- c += " int address3 = dst_offset * 4 + 3;\n";
- c += " args.dst_tensor.WriteLinear(r0, dst_offset * 4 + 0)\n;";
- c += " args.dst_tensor.WriteLinear(r1, dst_offset * 4 + 1)\n;";
- c += " args.dst_tensor.WriteLinear(r2, dst_offset * 4 + 2)\n;";
- c += " args.dst_tensor.WriteLinear(r3, dst_offset * 4 + 3)\n;";
- c += "}\n";
- return c;
-}
-
-absl::Status ConverterToConvWeights::BindArguments(ArgumentsBinder *args)
-{
- float4 mask = GetMaskForLastPlane(src_[0]->Channels());
- RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
- RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
- RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
- return args->SetFloat("mask_w", mask.w);
-}
-
-int3 ConverterToConvWeights::GetGridSize() const
-{
- const int grid_x =
- DivideRoundUp(AlignByN(src_[0]->Batch(), 4 * conv_weights_desc_.output_group_size), 4);
- const int grid_y = src_[0]->Slices();
- const int grid_z = src_[0]->Width() * src_[0]->Height();
- return int3(grid_x, grid_y, grid_z);
-}
-
-ConverterToConvWeights CreateConverterToConvWeights(const OperationDef &definition,
- const ConvWeightsDescription &conv_weights_desc)
-{
- return ConverterToConvWeights(definition, conv_weights_desc);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h
deleted file mode 100644
index bb68977eb..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Status.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConverterToConvWeights : public GPUOperation
-{
-public:
- ConverterToConvWeights(const OperationDef &definition,
- const ConvWeightsDescription &conv_weights_desc);
- absl::Status BindArguments(ArgumentsBinder *args) override;
- int3 GetGridSize() const override;
-
- // Move only
- ConverterToConvWeights(ConverterToConvWeights &&operation);
- ConverterToConvWeights &operator=(ConverterToConvWeights &&operation);
- ConverterToConvWeights(const ConverterToConvWeights &) = delete;
- ConverterToConvWeights &operator=(const ConverterToConvWeights &) = delete;
-
-private:
- std::string GetConverterToConvWeightsCode(const OperationDef &op_def,
- const ConvWeightsDescription &conv_weights_desc);
-
- ConvWeightsDescription conv_weights_desc_;
-};
-
-// We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I
-// as dst we expect Tensor with storage type BUFFER and
-// dst.b * dst.h * dst.w * dst.c = AlignByN(src.b, 4) * src.h * src.w
-// AlignByN(src.c, 4)
-ConverterToConvWeights
-CreateConverterToConvWeights(const OperationDef &definition,
- const ConvWeightsDescription &conv_weights_desc);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc
deleted file mode 100644
index cc2bc41d4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Converter.h"
-
-#include <algorithm>
-#include <array>
-#include <string>
-
-#include "open_cl/Arguments.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClErrors.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-class OpenClConverterImpl : public TensorObjectConverter
-{
-public:
- virtual absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) = 0;
-
-protected:
- absl::Status DispatchKernel(cl_mem buffer_mem, Tensor *tensor)
- {
- kernel_.ResetBindingCounter();
- RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem));
- RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
- const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices());
- const int3 work_group_size = {16, 8, 1};
- const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
- return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
- }
-
- Arguments args_;
- BHWC shape_;
- CLKernel kernel_;
- TensorDescriptor tensor_descriptor_;
- CLCommandQueue *queue_ = nullptr;
- const CLContext *context_ = nullptr;
-};
-
-bool IsSupportedDataType(DataType type)
-{
- return type == DataType::FLOAT16 || type == DataType::FLOAT32;
-}
-
-bool IsBHWCOpenCLBuffer(const ObjectDef &def)
-{
- return IsSupportedDataType(def.data_type) && def.object_type == ObjectType::OPENCL_BUFFER &&
- def.data_layout == DataLayout::BHWC;
-}
-
-bool IsOpenCLTensor(const ObjectDef &def)
-{
- const bool is_buffer_tensor =
- def.object_type == ObjectType::OPENCL_BUFFER && def.data_layout == DataLayout::DHWC4;
- const bool is_image2d_tensor =
- def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::HDWC4;
- const bool is_image2d_array_tensor =
- def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::DHWC4;
- const bool is_single_image_tensor =
- def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::BHWC;
- return IsSupportedDataType(def.data_type) && (is_buffer_tensor || is_image2d_tensor ||
- is_image2d_array_tensor || is_single_image_tensor);
-}
-
-absl::Status GetOpenCLMemory(const TensorObject &obj, cl_mem *memory)
-{
- auto texture = absl::get_if<OpenClTexture>(&obj);
- auto buffer = absl::get_if<OpenClBuffer>(&obj);
- if (texture && texture->memobj)
- {
- *memory = texture->memobj;
- }
- else if (buffer && buffer->memobj)
- {
- *memory = buffer->memobj;
- }
- else
- {
- return absl::InvalidArgumentError("Missing OpenCL object.");
- }
- return absl::OkStatus();
-}
-
-// Implements conversion from OpenCL tensor to another OpenCL tensor.
-class TensorToTensorConverter : public OpenClConverterImpl
-{
-public:
- static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
- {
- return IsOpenCLTensor(input) && IsOpenCLTensor(output);
- }
-
- absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) final
- {
- src_tensor_descriptor_.layout = Layout::BHWC;
- src_tensor_descriptor_.storage_type =
- ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
- src_tensor_descriptor_.data_type = input_def.object_def.data_type;
- args_.AddObjectRef("src_tensor", AccessType::READ,
- absl::make_unique<TensorDescriptor>(src_tensor_descriptor_));
-
- dst_tensor_descriptor_.layout = Layout::BHWC;
- dst_tensor_descriptor_.storage_type =
- ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
- dst_tensor_descriptor_.data_type = output_def.object_def.data_type;
- args_.AddObjectRef("dst_tensor", AccessType::WRITE,
- absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_));
-
- const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
- output_def.object_def.data_type == DataType::FLOAT16;
- const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
- std::string shader_src;
- if (need_fp16_support)
- {
- shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
- }
- shader_src +=
- R"(__kernel void tensor_to_tensor($0) {
- int linear_id = get_global_id(0);
- int x = linear_id / args.dst_tensor.Batch();
- int b = linear_id % args.dst_tensor.Batch();
- int y = get_global_id(1);
- int d = get_global_id(2);
- if (x >= args.dst_tensor.Width() || y >= args.dst_tensor.Height() || d >= args.dst_tensor.Slices()) return;
-)";
- shader_src +=
- " " + out_data_type + "4 input = args.src_tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
- shader_src += " args.dst_tensor.Write(input, x, y, d, b);\n}";
- queue_ = environment->queue();
- context_ = &environment->context();
- shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
- input_def.dimensions.c);
- RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
- return environment->program_cache()->GetOrCreateCLKernel(
- shader_src, "tensor_to_tensor", environment->context(), environment->device(), &kernel_);
- }
-
- absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
- {
- cl_mem in_memory = nullptr;
- RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
- cl_mem out_memory = nullptr;
- RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
-
- Tensor src_tensor;
- RETURN_IF_ERROR(
- CreateSharedTensor(*context_, in_memory, shape_, src_tensor_descriptor_, &src_tensor));
- Tensor dst_tensor;
- RETURN_IF_ERROR(
- CreateSharedTensor(*context_, out_memory, shape_, dst_tensor_descriptor_, &dst_tensor));
-
- RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor));
-
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- const int3 grid =
- int3(dst_tensor.Width() * dst_tensor.Batch(), dst_tensor.Height(), dst_tensor.Slices());
- const int3 work_group_size = {16, 8, 1};
- const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
- return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
- }
-
-private:
- TensorDescriptor src_tensor_descriptor_;
- TensorDescriptor dst_tensor_descriptor_;
-};
-
-// Implements conversion from OpenCL-specific tensor layout to BHWC OpenCL
-// buffer.
-class TensorToBHWCBufferConverter : public OpenClConverterImpl
-{
-public:
- static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
- {
- return IsOpenCLTensor(input) && IsBHWCOpenCLBuffer(output);
- }
-
- absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) final
- {
- TensorStorageType src_tensor_type =
- ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
- tensor_descriptor_.layout = Layout::BHWC;
- tensor_descriptor_.storage_type = src_tensor_type;
- tensor_descriptor_.data_type = input_def.object_def.data_type;
- args_.AddObjectRef("tensor", AccessType::READ,
- absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
- const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
- output_def.object_def.data_type == DataType::FLOAT16;
- std::string shader_src;
- if (need_fp16_support)
- {
- shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
- }
- const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
- shader_src += "__kernel void tensor_to_bhwc(";
- shader_src += "__global " + out_data_type + "* dst, $0) {\n";
- shader_src += R"( int linear_id = get_global_id(0);
- int x = linear_id / args.tensor.Batch();
- int b = linear_id % args.tensor.Batch();
- int y = get_global_id(1);
- int d = get_global_id(2);
- if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
- shader_src +=
- " " + out_data_type + "4 input = args.tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
- shader_src += R"( int c = d * 4;
- int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-
- dst[index] = input.x;
- if (c + 1 < args.tensor.Channels()) {
- dst[index + 1] = input.y;
- }
- if (c + 2 < args.tensor.Channels()) {
- dst[index + 2] = input.z;
- }
- if (c + 3 < args.tensor.Channels()) {
- dst[index + 3] = input.w;
- }
-})";
- queue_ = environment->queue();
- context_ = &environment->context();
- shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
- input_def.dimensions.c);
- RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
- return environment->program_cache()->GetOrCreateCLKernel(
- shader_src, "tensor_to_bhwc", environment->context(), environment->device(), &kernel_);
- }
-
- absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
- {
- auto output = absl::get_if<OpenClBuffer>(&output_obj);
- if (!output || !output->memobj)
- {
- return absl::InvalidArgumentError("Missing output in tensor_to_bhwc converter");
- }
-
- cl_mem in_memory = nullptr;
- RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
- Tensor tensor;
- RETURN_IF_ERROR(CreateSharedTensor(*context_, in_memory, shape_, tensor_descriptor_, &tensor));
- return DispatchKernel(output->memobj, &tensor);
- }
-};
-
-// Implements conversion from BHWC OpenCL buffer to OpenCL-specific tensor
-// layout.
-class BHWCBufferToTensorConverter : public OpenClConverterImpl
-{
-public:
- static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
- {
- return IsBHWCOpenCLBuffer(input) && IsOpenCLTensor(output);
- }
-
- std::pair<std::string, std::string> GetFromBhwcKernel(const TensorObjectDef &input_def,
- const TensorObjectDef &) const
- {
- return std::make_pair("__global " + ToCLDataType(input_def.object_def.data_type) + "* src",
- R"(int c = d * 4;
- int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
- result.x = src[index];
- result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
- result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
- result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)");
- }
-
- absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) final
- {
- auto params_kernel = GetFromBhwcKernel(input_def, output_def);
-
- TensorStorageType dst_tensor_type =
- ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
- tensor_descriptor_.layout = Layout::BHWC;
- tensor_descriptor_.storage_type = dst_tensor_type;
- tensor_descriptor_.data_type = output_def.object_def.data_type;
- args_.AddObjectRef("tensor", AccessType::WRITE,
- absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
- const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
- output_def.object_def.data_type == DataType::FLOAT16;
- std::string shader_src;
- if (need_fp16_support)
- {
- shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
- }
- const std::string in_data_type = ToCLDataType(input_def.object_def.data_type);
- const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
- shader_src += "__kernel void bhwc_to_tensor(";
- shader_src += "__global " + in_data_type + "* src, $0) {\n";
-
- shader_src += R"( int linear_id = get_global_id(0);
- int x = linear_id / args.tensor.Batch();
- int b = linear_id % args.tensor.Batch();
- int y = get_global_id(1);
- int d = get_global_id(2);
-
- if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
- shader_src += " " + out_data_type + "4 result;\n";
- shader_src += R"( int c = d * 4;
- int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
- result.x = src[index];
- result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
- result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
- result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)";
- shader_src += " args.tensor.Write(result, x, y, d, b);\n}";
- queue_ = environment->queue();
- context_ = &environment->context();
- shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w,
- output_def.dimensions.c);
- RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
- return environment->program_cache()->GetOrCreateCLKernel(
- shader_src, "bhwc_to_tensor", environment->context(), environment->device(), &kernel_);
- }
-
- absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
- {
- auto input = absl::get_if<OpenClBuffer>(&input_obj);
- if (!input || !input->memobj)
- {
- return absl::InvalidArgumentError("Missing input in bhwc_to_tensor converter");
- }
- cl_mem out_memory = nullptr;
- RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
- Tensor tensor;
- RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_, tensor_descriptor_, &tensor));
- return DispatchKernel(input->memobj, &tensor);
- }
-};
-
-std::array<size_t, 3> CalculateTextureRegion(const TensorObjectDef &def)
-{
- const auto &dims = def.dimensions;
- std::array<size_t, 3> region = {0, 0, 1};
- switch (ToTensorStorageType(def.object_def.object_type, def.object_def.data_layout))
- {
- case TensorStorageType::SINGLE_TEXTURE_2D:
- region[0] = static_cast<size_t>(dims.w * dims.b);
- region[1] = static_cast<size_t>(dims.h);
- break;
- case TensorStorageType::TEXTURE_2D:
- region[0] = static_cast<size_t>(dims.w * dims.b);
- region[1] = static_cast<size_t>(dims.h * dims.d());
- break;
- case TensorStorageType::TEXTURE_ARRAY:
- region[0] = static_cast<size_t>(dims.w * dims.b);
- region[1] = static_cast<size_t>(dims.h);
- region[2] = static_cast<size_t>(dims.d());
- break;
- default:
- break;
- }
- return region;
-}
-
-bool IsOpenClTextureOrBuffer(ObjectType type)
-{
- return type == ObjectType::OPENCL_BUFFER || type == ObjectType::OPENCL_TEXTURE;
-}
-
-// Copies data from one object of the same type and layout to another object.
-class TrivialCopier : public OpenClConverterImpl
-{
-public:
- static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
- {
- return IsOpenClTextureOrBuffer(input.object_type) && input.data_type == output.data_type &&
- input.object_type == output.object_type && input.data_layout == output.data_layout;
- }
-
- absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) final
- {
- shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
- input_def.dimensions.c);
- data_type_ = input_def.object_def.data_type;
- queue_ = environment->queue();
- region_ = CalculateTextureRegion(output_def);
- return absl::OkStatus();
- }
-
- absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
- {
- auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
- auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
- if (texture_input && texture_output)
- {
- return Copy(*texture_input, *texture_output);
- }
- auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
- auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
- if (buffer_input && buffer_output)
- {
- return Copy(*buffer_input, *buffer_output);
- }
- return absl::InternalError("Unexpected object");
- }
-
- absl::Status Copy(const OpenClBuffer &input, const OpenClBuffer &output)
- {
- if (input.memobj == output.memobj)
- {
- return absl::OkStatus();
- }
- return GetOpenCLError(clEnqueueCopyBuffer(queue_->queue(), input.memobj, output.memobj, 0, 0,
- SizeOf(data_type_) * shape_.w * shape_.h *
- AlignByN(shape_.c, 4) * shape_.b,
- 0, nullptr, nullptr));
- }
-
- absl::Status Copy(const OpenClTexture &input, const OpenClTexture &output)
- {
- if (input.memobj == output.memobj)
- {
- return absl::OkStatus();
- }
- size_t origin[3] = {0, 0, 0};
- return GetOpenCLError(clEnqueueCopyImage(queue_->queue(), input.memobj, output.memobj, origin,
- origin, region_.data(), 0, nullptr, nullptr));
- }
-
-private:
- DataType data_type_ = DataType::UNKNOWN;
- std::array<size_t, 3> region_;
-};
-
-// Copies data from/to CPU into a tensor.
-class CpuCopier : public OpenClConverterImpl
-{
-public:
- static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
- {
- return input.data_type == output.data_type && input.data_layout == output.data_layout &&
- ((input.object_type == ObjectType::CPU_MEMORY &&
- IsOpenClTextureOrBuffer(output.object_type)) ||
- (output.object_type == ObjectType::CPU_MEMORY &&
- IsOpenClTextureOrBuffer(input.object_type)));
- }
-
- absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
- Environment *environment) final
- {
-
- region_ = CalculateTextureRegion(
- input_def.object_def.object_type == ObjectType::CPU_MEMORY ? output_def : input_def);
- queue_ = environment->queue();
- return absl::OkStatus();
- }
-
- absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
- {
- auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
- auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
-
- if (cpu_input)
- {
- auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
- if (texture_output)
- {
- return queue_->EnqueueWriteImage(texture_output->memobj,
- int3(region_[0], region_[1], region_[2]), cpu_input->data);
- }
- auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
- if (buffer_output)
- {
- return queue_->EnqueueWriteBuffer(buffer_output->memobj, cpu_input->size_bytes,
- cpu_input->data);
- }
- }
- else if (cpu_output)
- {
- auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
- if (texture_input)
- {
- return queue_->EnqueueReadImage(texture_input->memobj,
- int3(region_[0], region_[1], region_[2]), cpu_output->data);
- }
- auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
- if (buffer_input)
- {
- return queue_->EnqueueReadBuffer(buffer_input->memobj, cpu_output->size_bytes,
- cpu_output->data);
- }
- }
- return absl::InternalError("Unexpected object");
- }
-
-private:
- std::array<size_t, 3> region_;
-};
-
-class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder
-{
-public:
- explicit OpenClTensorConverterBuilder(Environment *environment) : environment_(environment) {}
-
- bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const final
- {
- const auto &input_def = input.object_def;
- const auto &output_def = output.object_def;
- return input.dimensions == output.dimensions &&
- (TrivialCopier::IsSupported(input_def, output_def) ||
- TensorToTensorConverter::IsSupported(input_def, output_def) ||
- CpuCopier::IsSupported(input_def, output_def) ||
- TensorToBHWCBufferConverter::IsSupported(input_def, output_def) ||
- BHWCBufferToTensorConverter::IsSupported(input_def, output_def));
- }
-
- absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
- std::unique_ptr<TensorObjectConverter> *converter) final
- {
- std::unique_ptr<OpenClConverterImpl> impl;
- const auto &input_def = input.object_def;
- const auto &output_def = output.object_def;
- if (TrivialCopier::IsSupported(input_def, output_def))
- {
- impl = absl::make_unique<TrivialCopier>();
- }
- else if (TensorToTensorConverter::IsSupported(input_def, output_def))
- {
- impl = absl::make_unique<TensorToTensorConverter>();
- }
- else if (CpuCopier::IsSupported(input_def, output_def))
- {
- impl = absl::make_unique<CpuCopier>();
- }
- else if (TensorToBHWCBufferConverter::IsSupported(input_def, output_def))
- {
- impl = absl::make_unique<TensorToBHWCBufferConverter>();
- }
- else if (BHWCBufferToTensorConverter::IsSupported(input_def, output_def))
- {
- impl = absl::make_unique<BHWCBufferToTensorConverter>();
- }
- else
- {
- return absl::UnimplementedError("Unsupported conversion");
- }
- RETURN_IF_ERROR(impl->Init(input, output, environment_));
- *converter = std::move(impl);
- return absl::OkStatus();
- }
-
- Environment *environment_;
-};
-
-} // namespace
-
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment)
-{
- return absl::make_unique<OpenClTensorConverterBuilder>(environment);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h
deleted file mode 100644
index d69ec85bb..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-
-#include <memory>
-
-#include "open_cl/Environment.h"
-#include "open_cl/Spi.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Supports conversions from BHWC to internal OpenCL tensor representation and
-// back. Also supports F16/F32.
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc
deleted file mode 100644
index e409fef47..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv.h"
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/LinearStorage.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-bool IsSpecializedCase(int channel_multiplier)
-{
- return channel_multiplier == 1 || channel_multiplier == 2 || channel_multiplier == 4;
-}
-
-std::string GetSrcValue(int channel_multiplier, const std::string coords)
-{
- std::string c;
- if (channel_multiplier == 1)
- {
- c += " FLT4 src_final = args.src_tensor.Read(" + coords + ", S);\n";
- }
- else if (channel_multiplier == 2)
- {
- c += " int s_layer = S / 2;\n";
- c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
- c += " FLT2 t0 = S % 2 == 0 ? src.xy : src.zw;\n";
- c += " FLT4 src_final = (FLT4)(t0.x, t0.x, t0.y, t0.y);\n";
- }
- else if (channel_multiplier == 4)
- {
- c += " int s_layer = S / 4;\n";
- c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
- c += " FLT t0 = src.x;\n";
- c += " int reminder = S % 4;\n";
- c += " if (reminder == 1) t0 = src.y;\n";
- c += " if (reminder == 2) t0 = src.z;\n";
- c += " if (reminder == 3) t0 = src.w;\n";
- c += " FLT4 src_final = (FLT4)(t0, t0, t0, t0);\n";
- }
- else
- {
- c += " int s_layer = S / args.ch_multiplier;\n";
- c += " FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
- c += " int s_offset = (S % args.ch_multiplier) * 4;\n";
- c += " FLT4 src_final;\n";
- c += " FLT temp_arr[4] = {src.x, src.y, src.z, src.w};\n";
- c += " src_final.x = temp_arr[(s_offset + 0) / args.ch_multiplier];\n";
- c += " src_final.y = temp_arr[(s_offset + 1) / args.ch_multiplier];\n";
- c += " src_final.z = temp_arr[(s_offset + 2) / args.ch_multiplier];\n";
- c += " src_final.w = temp_arr[(s_offset + 3) / args.ch_multiplier];\n";
- }
-
- return c;
-}
-
-std::string GenerateDepthwiseConvolutionCode(const OperationDef &op_def, bool stride_correction,
- int channel_multiplier, bool weights_are_buffer,
- bool dynamic_weights, GPUOperation *op)
-{
- auto src_desc = op_def.src_tensors[0];
- src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddSrcTensor("src_tensor", src_desc);
- if (dynamic_weights)
- {
- op->AddSrcTensor("weights", op_def.src_tensors[1]);
- }
-
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddDstTensor("dst_tensor", dst_desc);
-
- const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
- std::string c = GetCommonDefines(op_def.precision);
-
- const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
- src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int linear_id_1 = get_global_id(1);\n";
- c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n";
- c += " int Z = linear_id_1 % args.dst_tensor.Depth();\n";
- }
- else
- {
- c += " int Y = get_global_id(1);\n";
- }
- c += " int S = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "S >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- c += " ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
- if (stride_correction)
- {
- c += " int x_offseted = " +
- GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
- ";\n";
- }
- else
- {
- if (op_def.IsBatchSupported())
- {
- c += " int x_offseted = X * args.stride_x + args.padding_x * "
- "args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int x_offseted = X * args.stride_x + args.padding_x;\n";
- }
- }
- c += " int y_offseted = Y * args.stride_y + args.padding_y;\n";
- if (!dynamic_weights)
- {
- std::string weights_offset = "args.kernel_size_x * args.kernel_size_y";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int z_offseted = Z * args.stride_z + args.padding_z;\n";
- weights_offset += " * args.kernel_size_z";
- }
- if (weights_are_buffer)
- {
- c += " int fx_c = S * " + weights_offset + ";\n";
- }
- else
- {
- c += " int fx_c = 0;\n";
- }
- }
- std::string kernel_size_x = dynamic_weights ? "args.weights.Width()" : "args.kernel_size_x";
- std::string kernel_size_y = dynamic_weights ? "args.weights.Height()" : "args.kernel_size_y";
- std::string kernel_size_z = dynamic_weights ? "args.weights.Depth()" : "args.kernel_size_z";
-
- std::string flat_coords = "x_c, y_c";
- if (manual_clamp)
- {
- std::string check = "!outside_x && !outside_y";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- check += " && !outside_z";
- flat_coords += ", z_c";
- c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
- c += " int z_c = z_offseted + kz * args.dilation_z;\n";
- c += " bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n";
- }
- c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
- c += " int y_c = y_offseted + ky * args.dilation_y;\n";
- c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
- c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
- const std::string dilation_x =
- op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
- c += " int x_c = x_offseted + kx * " + dilation_x + ";\n";
- c += " bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n";
- c += " if (" + check + ") {\n";
- if (dynamic_weights)
- {
- c += " FLT4 f = args.weights.Read(kx, ky, S);\n";
- }
- else
- {
- if (weights_are_buffer)
- {
- c += " FLT4 f = args.weights.Read(fx_c);\n";
- }
- else
- {
- c += " FLT4 f = args.weights.Read(fx_c, S);\n";
- }
- }
- c += GetSrcValue(channel_multiplier, flat_coords);
- c += " r += TO_ACCUM_TYPE(src_final * f);\n";
- c += " };\n";
- if (!dynamic_weights)
- {
- c += " fx_c++;\n";
- }
- c += " }\n";
- c += " }\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " }\n";
- }
- }
- else
- { // Texture types with ZERO clamping
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- flat_coords += ", z_c";
- c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
- c += " int z_c = z_offseted + kz * args.dilation_z;\n";
- if (src_tensor_type != TensorStorageType::TEXTURE_3D)
- { // Only TEXTURE_3D supports clamping
- // in DEPTH dimension
- c += " if (z_c < 0 || z_c >= args.src_tensor.Depth()) {\n";
- c += " fx_c += args.kernel_size_y * args.kernel_size_x;\n";
- c += " continue;\n";
- c += " }\n";
- }
- }
- c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
- c += " int y_c = y_offseted + ky * args.dilation_y;\n";
- c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
- const std::string dilation_x =
- op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
- c += " int x_c = x_offseted + kx * " + dilation_x + ";\n";
- c += GetSrcValue(channel_multiplier, flat_coords);
- if (dynamic_weights)
- {
- c += " FLT4 f = args.weights.Read(kx, ky, S);\n";
- }
- else
- {
- if (weights_are_buffer)
- {
- c += " FLT4 f = args.weights.Read(fx_c);\n";
- }
- else
- {
- c += " FLT4 f = args.weights.Read(fx_c, S);\n";
- }
- c += " fx_c++;\n";
- }
- c += " r += TO_ACCUM_TYPE(src_final * f);\n";
- c += " }\n";
- c += " }\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " }\n";
- }
- }
- c += " FLT4 res0 = TO_FLT4(r) + args.biases.Read(S);\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " args.dst_tensor.Write(res0, X, Y, Z, S);\n";
- }
- else
- {
- c += " args.dst_tensor.Write(res0, X, Y, S);\n";
- }
- c += "}\n";
-
- return c;
-}
-} // namespace
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr)
-{
- bool weights_are_buffer = device_info.IsMali();
- GPUOperation op(definition);
- op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("dilation_x", attr.dilations.w);
- op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
- op.args_.AddInt("stride_y", attr.strides.h);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("dilation_y", attr.dilations.h);
- if (!IsSpecializedCase(attr.weights.shape.o))
- {
- op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
- }
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
- weights_are_buffer, false, &op);
- UploadWeightsForDWConv2D(attr.weights, weights_are_buffer, definition.precision, &op);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
- TensorLinearDescriptor desc;
- desc.storage_type =
- weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
- desc.element_type = definition.GetDataType();
- desc.UploadLinearData(attr.bias);
- op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
- return op;
-}
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr)
-{
- GPUOperation op(definition);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("dilation_x", attr.dilations.w);
- op.args_.AddInt("stride_y", attr.strides.h);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("dilation_y", attr.dilations.h);
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1, false, true, &op);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
- TensorLinearDescriptor desc;
- desc.storage_type =
- device_info.IsMali() ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
- desc.element_type = definition.GetDataType();
- desc.UploadLinearData(attr.bias);
- op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
- return op;
-}
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution3DAttributes &attr)
-{
- bool weights_are_buffer = device_info.IsMali();
- GPUOperation op(definition);
- op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("dilation_x", attr.dilations.w);
- op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
- op.args_.AddInt("stride_y", attr.strides.h);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("dilation_y", attr.dilations.h);
- op.args_.AddInt("kernel_size_z", attr.weights.shape.d);
- op.args_.AddInt("stride_z", attr.strides.d);
- op.args_.AddInt("padding_z", -attr.padding.prepended.d);
- op.args_.AddInt("dilation_z", attr.dilations.d);
- if (!IsSpecializedCase(attr.weights.shape.o))
- {
- op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
- }
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
- weights_are_buffer, false, &op);
- UploadWeightsForDWConv3D(attr.weights, weights_are_buffer, definition.precision, &op);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
- TensorLinearDescriptor desc;
- desc.storage_type =
- weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
- desc.element_type = definition.GetDataType();
- desc.UploadLinearData(attr.bias);
- op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h
deleted file mode 100644
index cbadd9fde..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv2D(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
- const int dst_channels = weights.shape.i * weights.shape.o;
- const int dst_depth = DivideRoundUp(dst_channels, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
-
- int counter = 0;
- for (int d = 0; d < dst_depth; ++d)
- {
- for (int y = 0; y < kernel_y; ++y)
- {
- for (int x = 0; x < kernel_x; ++x)
- {
- T filter_val;
- for (int i = 0; i < 4; ++i)
- {
- const int d_ch = d * 4 + i;
- if (d_ch < dst_channels)
- {
- const int f_index =
- weights.shape.LinearIndex({d_ch % weights.shape.o, y, x, d_ch / weights.shape.o});
- filter_val[i] = weights.data[f_index];
- }
- else
- {
- filter_val[i] = 0.0f;
- }
- }
- dst[counter++] = filter_val;
- }
- }
- }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv2D(const InternalTensor<OHWI, T> &weights, bool weights_are_buffer,
- CalculationsPrecision precision, GPUOperation *op)
-{
- const int dst_channels = weights.shape.i * weights.shape.o;
- const int dst_slices = DivideRoundUp(dst_channels, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
-
- const int elements_count = kernel_x * kernel_y * dst_slices;
-
- const bool fp32_weights = precision == CalculationsPrecision::F32;
- const int float4_size = fp32_weights ? 16 : 8;
-
- std::vector<uint8_t> data(float4_size * elements_count);
-
- if (fp32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(data.data());
- RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
- }
- // TODO
- // It doesn't support F16 yet. I will try to add it later.
- //
- // else {
- // half4* ptr = reinterpret_cast<half4*>(data.data());
- // RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
- // }
-
- if (weights_are_buffer)
- {
- BufferDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 4;
- desc.size = float4_size * elements_count;
- desc.data = std::move(data);
- op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
- }
- else
- {
- Texture2DDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.size = int2(kernel_x * kernel_y, dst_slices);
- desc.data = std::move(data);
- op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(desc));
- }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv3D(const InternalTensor<OHWDI, S> &weights, absl::Span<T> dst)
-{
- const int dst_channels = weights.shape.i * weights.shape.o;
- const int dst_slices = DivideRoundUp(dst_channels, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
- const int kernel_z = weights.shape.d;
-
- int counter = 0;
- for (int d = 0; d < dst_slices; ++d)
- {
- for (int z = 0; z < kernel_z; ++z)
- {
- for (int y = 0; y < kernel_y; ++y)
- {
- for (int x = 0; x < kernel_x; ++x)
- {
- T filter_val;
- for (int i = 0; i < 4; ++i)
- {
- const int d_ch = d * 4 + i;
- if (d_ch < dst_channels)
- {
- const int f_index = weights.shape.LinearIndex(
- {d_ch % weights.shape.o, y, x, z, d_ch / weights.shape.o});
- filter_val[i] = weights.data[f_index];
- }
- else
- {
- filter_val[i] = 0.0f;
- }
- }
- dst[counter++] = filter_val;
- }
- }
- }
- }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv3D(const InternalTensor<OHWDI, T> &weights, bool weights_are_buffer,
- CalculationsPrecision precision, GPUOperation *op)
-{
- const int dst_channels = weights.shape.i * weights.shape.o;
- const int dst_slices = DivideRoundUp(dst_channels, 4);
- const int kernel_x = weights.shape.w;
- const int kernel_y = weights.shape.h;
- const int kernel_z = weights.shape.d;
-
- const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
-
- const bool fp32_weights = precision == CalculationsPrecision::F32;
- const int float4_size = fp32_weights ? 16 : 8;
-
- std::vector<uint8_t> data(float4_size * elements_count);
-
- if (fp32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(data.data());
- RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
- }
- // TODO
- // It doesn't support F16 yet. I will try to add it later.
- //
- // else {
- // half4* ptr = reinterpret_cast<half4*>(data.data());
- // RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
- // }
-
- if (weights_are_buffer)
- {
- BufferDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 4;
- desc.size = float4_size * elements_count;
- desc.data = std::move(data);
- op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
- }
- else
- {
- Texture2DDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
- desc.data = std::move(data);
- op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
- }
-}
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc
deleted file mode 100644
index 89a14f14d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv3x3.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
- bool local_mem_uploads, const DeviceInfo &device_info)
- : GPUOperation(definition), local_mem_uploads_(local_mem_uploads)
-{
- work_group_size_ = int3(8, 4, 1);
- code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer, local_mem_uploads_);
-
- if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
- {
- compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
- }
-}
-
-DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3 &&operation)
- : GPUOperation(std::move(operation)), local_mem_uploads_(operation.local_mem_uploads_)
-{
-}
-
-DepthwiseConv3x3 &DepthwiseConv3x3::operator=(DepthwiseConv3x3 &&operation)
-{
- if (this != &operation)
- {
- std::swap(local_mem_uploads_, operation.local_mem_uploads_);
- GPUOperation::operator=(std::move(operation));
- }
- return *this;
-}
-
-std::string DepthwiseConv3x3::GenerateDepthwiseConvCode(const OperationDef &op_def,
- bool weights_are_buffer,
- bool local_mem_uploads)
-{
- auto src_desc = op_def.src_tensors[0];
- src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
- AddSrcTensor("src_tensor", src_desc);
- AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-
- const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
- const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
- src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
- std::string c = GetCommonDefines(op_def.precision);
- if (local_mem_uploads)
- {
- c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
- }
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int linear_id = get_global_id(0);\n";
- c += " int X = (linear_id / args.dst_tensor.Batch()) * 2;\n";
- c += " int B = linear_id % args.dst_tensor.Batch();\n";
- c += " args.dst_tensor.SetBatchRef(B);\n";
- c += " args.src_tensor.SetBatchRef(B);\n";
- }
- else
- {
- c += " int X = get_global_id(0) * 2;\n";
- }
- c += " int Y = get_global_id(1) * 2;\n";
- c += " int S = get_global_id(2);\n";
- c += " ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n";
- c += " ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n";
- c += " ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n";
- c += " ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n";
- if (!local_mem_uploads)
- {
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
- "|| S >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- }
- if (local_mem_uploads)
- {
- c += " __local FLT4 f[10];\n";
- c += " event_t e = async_work_group_copy(f, args.weights.GetPtr() + S * "
- "10, 10, 0);\n";
- c += " wait_group_events(1, &e);\n";
- }
- else if (weights_are_buffer)
- {
- c += " __global FLT4* f = args.weights.GetPtr() + S * 10;\n";
- }
- c += " FLT4 s0;\n";
- c += " FLT4 s1;\n";
- c += " FLT4 s2;\n";
- c += " FLT4 s3;\n";
- std::string W[9] = {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"};
- std::string bias = "bias";
- std::string xc[4] = {"X - 1", "X", "X + 1", "X + 2"};
- std::string yc[4] = {"Y - 1", "Y", "Y + 1", "Y + 2"};
- if (!weights_are_buffer)
- {
- c += " FLT4 f0 = args.weights.Read(0, S);\n";
- c += " FLT4 f1 = args.weights.Read(1, S);\n";
- c += " FLT4 f2 = args.weights.Read(2, S);\n";
- c += " FLT4 f3 = args.weights.Read(3, S);\n";
- c += " FLT4 f4 = args.weights.Read(4, S);\n";
- c += " FLT4 f5 = args.weights.Read(5, S);\n";
- c += " FLT4 f6 = args.weights.Read(6, S);\n";
- c += " FLT4 f7 = args.weights.Read(7, S);\n";
- c += " FLT4 f8 = args.weights.Read(8, S);\n";
- }
- if (manual_clamp)
- {
- c += " int x0 = X - 1;\n";
- c += " int x1 = X;\n";
- c += " int x2 = X + 1;\n";
- c += " int x3 = X + 2;\n";
- c += " int y0 = Y - 1;\n";
- c += " int y1 = Y;\n";
- c += " int y2 = Y + 1;\n";
- c += " int y3 = Y + 2;\n";
- c += " bool x0_in = x0 >= 0 && x0 < args.dst_tensor.Width();\n";
- c += " bool x1_in = x1 >= 0 && x1 < args.dst_tensor.Width();\n";
- c += " bool x2_in = x2 >= 0 && x2 < args.dst_tensor.Width();\n";
- c += " bool x3_in = x3 >= 0 && x3 < args.dst_tensor.Width();\n";
- c += " bool y0_in = y0 >= 0 && y0 < args.dst_tensor.Height();\n";
- c += " bool y1_in = y1 >= 0 && y1 < args.dst_tensor.Height();\n";
- c += " bool y2_in = y2 >= 0 && y2 < args.dst_tensor.Height();\n";
- c += " bool y3_in = y3 >= 0 && y3 < args.dst_tensor.Height();\n";
- c += " x0 = clamp(x0, 0, args.dst_tensor.Width() - 1);\n";
- c += " x1 = clamp(x1, 0, args.dst_tensor.Width() - 1);\n";
- c += " x2 = clamp(x2, 0, args.dst_tensor.Width() - 1);\n";
- c += " x3 = clamp(x3, 0, args.dst_tensor.Width() - 1);\n";
- c += " y0 = clamp(y0, 0, args.dst_tensor.Height() - 1);\n";
- c += " y1 = clamp(y1, 0, args.dst_tensor.Height() - 1);\n";
- c += " y2 = clamp(y2, 0, args.dst_tensor.Height() - 1);\n";
- c += " y3 = clamp(y3, 0, args.dst_tensor.Height() - 1);\n";
- if (src_tensor_type == TensorStorageType::BUFFER)
- {
- c += " __global FLT4* src_loc = "
- "args.src_tensor.GetPtrWithSliceOffset(S);\n";
- }
- xc[0] = "x0";
- xc[1] = "x1";
- xc[2] = "x2";
- xc[3] = "x3";
- yc[0] = "y0";
- yc[1] = "y1";
- yc[2] = "y2";
- yc[3] = "y3";
- }
- if (local_mem_uploads || weights_are_buffer)
- {
- W[0] = "f[0]";
- W[1] = "f[1]";
- W[2] = "f[2]";
- W[3] = "f[3]";
- W[4] = "f[4]";
- W[5] = "f[5]";
- W[6] = "f[6]";
- W[7] = "f[7]";
- W[8] = "f[8]";
- bias = "f[9]";
- }
- auto read_4x_line = [&](int y) {
- if (src_tensor_type == TensorStorageType::BUFFER)
- {
- const std::string y_in = "y" + std::to_string(y) + "_in";
- c += " s0 = src_loc[args.src_tensor.GetWHOffset(" + xc[0] + ", " + yc[y] +
- ")] * (FLT)(x0_in && " + y_in + ");\n";
- c += " s1 = src_loc[args.src_tensor.GetWHOffset(" + xc[1] + ", " + yc[y] +
- ")] * (FLT)(x1_in && " + y_in + ");\n";
- c += " s2 = src_loc[args.src_tensor.GetWHOffset(" + xc[2] + ", " + yc[y] +
- ")] * (FLT)(x2_in && " + y_in + ");\n";
- c += " s3 = src_loc[args.src_tensor.GetWHOffset(" + xc[3] + ", " + yc[y] +
- ")] * (FLT)(x3_in && " + y_in + ");\n";
- }
- else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER)
- {
- const std::string y_in = "y" + std::to_string(y) + "_in";
- c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S) * (FLT)(x0_in && " +
- y_in + ");\n";
- c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S) * (FLT)(x1_in && " +
- y_in + ");\n";
- c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S) * (FLT)(x2_in && " +
- y_in + ");\n";
- c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S) * (FLT)(x3_in && " +
- y_in + ");\n";
- }
- else
- {
- c += " s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S);\n";
- c += " s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S);\n";
- c += " s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S);\n";
- c += " s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S);\n";
- }
- };
- c += " {\n";
- read_4x_line(0);
- c += " r0 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
- c += " }\n";
- c += " {\n";
- read_4x_line(1);
- c += " r0 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
- c += " }\n";
- c += " {\n";
- read_4x_line(2);
- c += " r0 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
- c += " r0 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
- c += " r1 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
- c += " }\n";
- c += " {\n";
- read_4x_line(3);
- c += " r2 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
- c += " r2 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
- c += " r3 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
- c += " }\n";
- if (!weights_are_buffer)
- {
- c += " FLT4 bias = args.weights.Read(9, S);\n";
- }
- c += " r0 += TO_ACCUM_TYPE(" + bias + ");\n";
- c += " r1 += TO_ACCUM_TYPE(" + bias + ");\n";
- c += " r2 += TO_ACCUM_TYPE(" + bias + ");\n";
- c += " r3 += TO_ACCUM_TYPE(" + bias + ");\n";
- if (local_mem_uploads)
- {
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
- "|| S >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- }
- c += " if(X + 0 < args.dst_tensor.Width() && Y + 0 < "
- "args.dst_tensor.Height()) {\n";
- c += " FLT4 result = TO_FLT4(r0);\n";
- c += " args.dst_tensor.Write(result, X + 0, Y + 0, S)\n";
- c += " }\n";
- c += " if(X + 1 < args.dst_tensor.Width() && Y + 0 < "
- "args.dst_tensor.Height()) {\n";
- c += " FLT4 result = TO_FLT4(r1);\n";
- c += " args.dst_tensor.Write(result, X + 1, Y + 0, S)\n";
- c += " }\n";
- c += " if(X + 0 < args.dst_tensor.Width() && Y + 1 < "
- "args.dst_tensor.Height()) {\n";
- c += " FLT4 result = TO_FLT4(r2);\n";
- c += " args.dst_tensor.Write(result, X + 0, Y + 1, S)\n";
- c += " }\n";
- c += " if(X + 1 < args.dst_tensor.Width() && Y + 1 < "
- "args.dst_tensor.Height()) {\n";
- c += " FLT4 result = TO_FLT4(r3);\n";
- c += " args.dst_tensor.Write(result, X + 1, Y + 1, S)\n";
- c += " }\n";
- c += "}\n";
-
- return c;
-}
-
-int3 DepthwiseConv3x3::GetGridSize() const
-{
- const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch();
- const int grid_y = DivideRoundUp(dst_[0]->Height(), 2);
- const int grid_z = dst_[0]->Slices();
- return int3(grid_x, grid_y, grid_z);
-}
-
-void DepthwiseConv3x3::GetPossibleKernelWorkGroups(TuningType tuning_type,
- const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const
-{
- if (local_mem_uploads_)
- {
- work_groups->push_back(work_group_size_);
- }
- else
- {
- GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
- }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr)
-{
- return attr.weights.shape.o == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
- attr.weights.shape.w == 3 && attr.weights.shape.h == 3 && attr.strides.w == 1 &&
- attr.strides.h == 1 && attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1 &&
- attr.padding.appended.w == 1 && attr.padding.appended.h == 1;
-}
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr)
-{
- bool weights_are_buffer = device_info.IsPowerVR() || device_info.IsMali();
- bool local_mem_uploads = weights_are_buffer && device_info.IsPowerVR();
- DepthwiseConv3x3 result(definition, weights_are_buffer, local_mem_uploads, device_info);
- result.UploadWeightsAndBiases(attr.weights, attr.bias, weights_are_buffer);
- return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h
deleted file mode 100644
index 8c571105a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-
-#include <memory>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class DepthwiseConv3x3 : public GPUOperation
-{
-public:
- DepthwiseConv3x3() = default;
- void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const override;
- int3 GetGridSize() const override;
-
- // Move only
- DepthwiseConv3x3(DepthwiseConv3x3 &&operation);
- DepthwiseConv3x3 &operator=(DepthwiseConv3x3 &&operation);
- DepthwiseConv3x3(const DepthwiseConv3x3 &) = delete;
- DepthwiseConv3x3 &operator=(const DepthwiseConv3x3 &) = delete;
-
-private:
- explicit DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
- bool local_mem_uploads, const DeviceInfo &device_info);
- template <DataType T>
- void UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
- const InternalTensor<Linear, T> &biases, bool weights_are_buffer);
-
- friend DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr);
-
- template <DataType S, typename T>
- void RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
- const InternalTensor<Linear, S> &biases, absl::Span<T> dst);
-
- std::string GenerateDepthwiseConvCode(const OperationDef &op_def, bool weights_are_buffer,
- bool local_mem_uploads);
-
- bool local_mem_uploads_;
-};
-
-template <DataType T>
-void DepthwiseConv3x3::UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
- const InternalTensor<Linear, T> &biases,
- bool weights_are_buffer)
-{
- const int src_depth = DivideRoundUp(weights.shape.i, 4);
- int texture_width = 10; // 3x3 kernel + 1 bias
- int texture_height = src_depth;
- const int elements_count = texture_width * texture_height;
- const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
- const int float4_size = fp32_weights ? 16 : 8;
-
- std::vector<uint8_t> data(float4_size * elements_count);
- if (fp32_weights)
- {
- float4 *ptr = reinterpret_cast<float4 *>(data.data());
- RearrangeWeightsAndBiasesData(weights, biases, absl::MakeSpan(ptr, elements_count));
- }
- // TODO
- // It doesn't support F16 yet. I will try to add it later.
- //
- // else {
- // half4* ptr = reinterpret_cast<half4*>(data.data());
- // RearrangeWeightsAndBiasesData(weights, biases,
- // absl::MakeSpan(ptr, elements_count));
- // }
-
- if (weights_are_buffer)
- {
- BufferDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.element_size = 4;
- desc.size = float4_size * elements_count;
- desc.data = std::move(data);
- args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
- }
- else
- {
- Texture2DDescriptor desc;
- desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
- desc.size = int2(texture_width, texture_height);
- desc.data = std::move(data);
- args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
- }
-}
-
-template <DataType S, typename T>
-void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
- const InternalTensor<Linear, S> &biases,
- absl::Span<T> dst)
-{
- const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
- int counter = 0;
- for (int s = 0; s < src_depth; ++s)
- {
- for (int y = 0; y < 3; ++y)
- {
- for (int x = 0; x < 3; ++x)
- {
- T filter_val;
- for (int i = 0; i < 4; ++i)
- {
- const int s_ch = s * 4 + i;
- if (s_ch < weights.shape.i)
- {
- const int f_index = weights.shape.LinearIndex({0, y, x, s_ch});
- filter_val[i] = weights.data[f_index];
- }
- else
- {
- filter_val[i] = 0.0f;
- }
- }
- dst[counter++] = filter_val;
- }
- }
-
- T bias_val;
- for (int i = 0; i < 4; ++i)
- {
- const int dst_ch = s * 4 + i;
- bias_val[i] = dst_ch >= biases.shape.v ? 0.0f : biases.data[dst_ch];
- }
- dst[counter++] = bias_val;
- }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr);
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
- const OperationDef &definition,
- const DepthwiseConvolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc
deleted file mode 100644
index 8839d9687..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GpuOperation.h"
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "open_cl/AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetElementWiseCode(const OperationDef &op_def, bool check_src_slices)
-{
- std::string c = GetCommonDefines(op_def.precision);
-
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- c += " int Y = get_global_id(1);\n";
- c += " int Z = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) return; \n";
- if (check_src_slices)
- {
- c += " FLT4 src = (FLT4)(0.0f);\n";
- c += " if (Z < args.src_tensor.Slices()) {\n";
- c += " src = args.src_tensor.Read(X, Y, Z);\n";
- c += " }\n";
- }
- else
- {
- c += " FLT4 src = args.src_tensor.Read(X, Y, Z);\n";
- }
- c += " args.dst_tensor.Write(src, X, Y, Z);\n";
- c += "} \n";
- return c;
-}
-
-int3 GetWorkGroupsCount(int grid_dimension, const int3 &grid_size, const int3 &work_group_size,
- const int3 &work_group_launch_order)
-{
- int3 work_groups_count;
- if (grid_dimension == 1)
- {
- work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
- work_groups_count.y = 1;
- work_groups_count.z = 1;
- }
- else if (grid_dimension == 2)
- {
- int3 wgs;
- wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
- wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
- work_groups_count.x = wgs[work_group_launch_order[0]];
- work_groups_count.y = wgs[work_group_launch_order[1]];
- work_groups_count.z = 1;
- }
- else
- { // grid_dimension == 3
- int3 wgs;
- wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
- wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
- wgs.z = DivideRoundUp(grid_size.z, work_group_size.z);
- work_groups_count.x = wgs[work_group_launch_order[0]];
- work_groups_count.y = wgs[work_group_launch_order[1]];
- work_groups_count.z = wgs[work_group_launch_order[2]];
- }
- return work_groups_count;
-}
-
-} // namespace
-
-DataType OperationDef::GetDataType() const { return DeduceDataTypeFromPrecision(precision); }
-
-DataType OperationDef::GetPrimaryDataType() const { return src_tensors[0].data_type; }
-TensorStorageType OperationDef::GetPrimaryStorageType() const
-{
- return src_tensors[0].storage_type;
-}
-
-bool OperationDef::IsBatchSupported() const
-{
- for (const auto &src : src_tensors)
- {
- if (HasAxis(src.layout, Axis::BATCH))
- {
- return true;
- }
- }
- for (const auto &dst : dst_tensors)
- {
- if (HasAxis(dst.layout, Axis::BATCH))
- {
- return true;
- }
- }
- return false;
-}
-
-GPUOperation::GPUOperation(const OperationDef &definition) : definition_(definition) {}
-
-void GPUOperation::SetSrc(Tensor *ptr, int index)
-{
- if (index >= (int)src_.size())
- {
- src_.resize(index + 1, nullptr);
- }
- src_[index] = ptr;
-}
-
-void GPUOperation::SetDst(Tensor *ptr, int index)
-{
- if (index >= (int)dst_.size())
- {
- dst_.resize(index + 1, nullptr);
- }
- dst_[index] = ptr;
-}
-
-GPUOperation::GPUOperation(GPUOperation &&operation)
- : args_(std::move(operation.args_)), code_(std::move(operation.code_)),
- work_group_size_(operation.work_group_size_),
- compiler_options_(std::move(operation.compiler_options_)),
- tensor_to_grid_(operation.tensor_to_grid_), elementwise_(operation.elementwise_),
- linkable_(operation.linkable_), check_src_channels_size_(operation.check_src_channels_size_),
- definition_(std::move(operation.definition_)), src_(std::move(operation.src_)),
- dst_(std::move(operation.dst_)), kernel_(std::move(operation.kernel_)),
- grid_dimension_(operation.grid_dimension_),
- work_group_launch_order_(operation.work_group_launch_order_), grid_size_(operation.grid_size_),
- src_tensors_names_(std::move(operation.src_tensors_names_)),
- dst_tensors_names_(std::move(operation.dst_tensors_names_)),
- work_groups_count_(operation.work_groups_count_), linkable_count_(operation.linkable_count_),
- elementwise_code_(std::move(operation.elementwise_code_))
-{
-}
-
-GPUOperation &GPUOperation::operator=(GPUOperation &&operation)
-{
- if (this != &operation)
- {
- args_ = std::move(operation.args_);
- code_ = std::move(operation.code_);
- std::swap(work_group_size_, operation.work_group_size_);
- compiler_options_ = std::move(operation.compiler_options_);
- tensor_to_grid_ = operation.tensor_to_grid_;
- elementwise_ = operation.elementwise_;
- linkable_ = operation.linkable_;
- check_src_channels_size_ = operation.check_src_channels_size_;
- definition_ = std::move(operation.definition_);
- src_ = std::move(operation.src_);
- dst_ = std::move(operation.dst_);
- kernel_ = std::move(operation.kernel_);
- std::swap(grid_dimension_, operation.grid_dimension_);
- std::swap(work_group_launch_order_, operation.work_group_launch_order_);
- std::swap(grid_size_, operation.grid_size_);
- src_tensors_names_ = std::move(operation.src_tensors_names_);
- dst_tensors_names_ = std::move(operation.dst_tensors_names_);
- std::swap(work_groups_count_, operation.work_groups_count_);
- std::swap(linkable_count_, operation.linkable_count_);
- elementwise_code_ = std::move(operation.elementwise_code_);
- }
- return *this;
-}
-
-absl::Status GPUOperation::AddOperation(GPUOperation *operation)
-{
- linkable_count_ += 1;
- std::string code = operation->code_;
- std::string unique_postfix = absl::StrCat("_link", linkable_count_);
- operation->args_.RenameArgs(unique_postfix, &code);
- elementwise_code_ += "{\n" + code + "\n}\n";
- RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix));
- for (size_t i = 0; i < operation->src_tensors_names_.size(); ++i)
- {
- definition_.src_tensors.push_back(operation->definition_.src_tensors[i + 1]);
- src_tensors_names_.push_back(operation->src_tensors_names_[i] + unique_postfix);
- }
- for (size_t i = 0; i < operation->dst_tensors_names_.size(); ++i)
- {
- dst_tensors_names_.push_back(operation->dst_tensors_names_[i] + unique_postfix);
- }
- return absl::OkStatus();
-}
-
-void GPUOperation::AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
- src_tensors_names_.push_back(tensor_name);
- auto desc_new = std::make_unique<TensorDescriptor>(desc);
- args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc)
-{
- src_tensors_names_.push_back(buffer_name);
- auto desc_new = std::make_unique<BufferDescriptor>(desc);
- args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
- dst_tensors_names_.push_back(tensor_name);
- auto desc_new = std::make_unique<TensorDescriptor>(desc);
- args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
-}
-
-absl::Status GPUOperation::UpdateParams()
-{
- for (size_t i = 0; i < src_tensors_names_.size(); ++i)
- {
- RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i]));
- }
- for (size_t i = 0; i < dst_tensors_names_.size(); ++i)
- {
- RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
- }
- RETURN_IF_ERROR(BindArguments(&args_));
- grid_size_ = GetGridSize();
- work_groups_count_ =
- GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
- return absl::OkStatus();
-}
-
-absl::Status GPUOperation::AssembleCode(const DeviceInfo &device_info, CLContext *context)
-{
- if (elementwise_)
- {
- auto src_desc = absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
- if (definition_.IsBatchSupported())
- {
- src_desc->SetStateVar("BatchedWidth", "true");
- }
- src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
- args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
-
- auto dst_desc = absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
- if (definition_.IsBatchSupported())
- {
- dst_desc->SetStateVar("BatchedWidth", "true");
- }
- dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
- args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
-
- elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
- code_ = GetElementWiseCode(definition_, check_src_channels_size_);
- RETURN_IF_ERROR(args_.AllocateObjects(context));
- RETURN_IF_ERROR(
- args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
- }
- else
- {
- RETURN_IF_ERROR(args_.AllocateObjects(context));
- RETURN_IF_ERROR(
- args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
- }
- return absl::OkStatus();
-}
-
-absl::Status GPUOperation::Compile(const CreationContext &creation_context)
-{
- RETURN_IF_ERROR(AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
- RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
- code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device,
- &kernel_));
- return PostCompileCheck(creation_context.device->info_, kernel_.info_);
-}
-
-absl::Status GPUOperation::CompileDeserialized(const CreationContext &creation_context)
-{
- return creation_context.cache->GetOrCreateCLKernel(code_, "main_function", compiler_options_,
- *creation_context.context,
- *creation_context.device, &kernel_);
-}
-
-void GPUOperation::GetPossibleKernelWorkGroups(TuningType tuning_type,
- const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const
-{
- GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-absl::Status GPUOperation::Tune(const TuningParameters &params)
-{
- std::vector<int3> possible_work_groups;
- GetPossibleKernelWorkGroups(params.tuning_type, *params.info, kernel_.info_,
- &possible_work_groups);
- if (possible_work_groups.empty())
- {
- return absl::NotFoundError("Can not found work_group size to launch kernel");
- }
- if (possible_work_groups.size() == 1)
- {
- work_group_size_ = possible_work_groups[0];
- work_groups_count_ =
- GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
- return absl::OkStatus();
- }
- else
- {
- std::vector<int3> work_groups_count(possible_work_groups.size());
- for (size_t i = 0; i < work_groups_count.size(); ++i)
- {
- work_groups_count[i] = GetWorkGroupsCount(grid_dimension_, grid_size_,
- possible_work_groups[i], work_group_launch_order_);
- }
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- int best_work_group_index;
- RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
- kernel_, *params.info, work_groups_count, possible_work_groups, &best_work_group_index));
- work_group_size_ = possible_work_groups[best_work_group_index];
- work_groups_count_ =
- GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
- return absl::OkStatus();
- }
-}
-
-int3 GPUOperation::GetGridSize() const
-{
- if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ)
- {
- const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
- const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
- const int grid_z = dst_[0]->Slices();
- return int3(grid_x, grid_y, grid_z);
- }
- if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1)
- {
- const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
- const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
- const int grid_z = 1;
- return int3(grid_x, grid_y, grid_z);
- }
- if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ)
- {
- const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
- const int grid_y = dst_[0]->Height();
- const int grid_z = dst_[0]->Depth();
- return int3(grid_x, grid_y, grid_z);
- }
- if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1)
- {
- const int grid_x = dst_[0]->Batch();
- const int grid_y = 1;
- const int grid_z = 1;
- return int3(grid_x, grid_y, grid_z);
- }
- return grid_size_;
-}
-
-void GPUOperation::AddUniquePostfix(const std::string &unique_postfix)
-{
- for (uint32_t i = 0; i < src_tensors_names_.size(); ++i)
- {
- src_tensors_names_[i] += unique_postfix;
- }
- for (uint32_t i = 0; i < dst_tensors_names_.size(); ++i)
- {
- dst_tensors_names_[i] += unique_postfix;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h
deleted file mode 100644
index 4f531c629..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-
-#include <string>
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/Arguments.h"
-#include "open_cl/Buffer.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/ClProgram.h"
-#include "open_cl/DataType.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ProgramCache.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Types.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// kCustom: default value
-// GPUOperation::GetGridSize must be overloaded
-// kWBToX_HDToY_SToZ:
-// grid_x = dst_[0]->Width() * dst_[0]->Batch();
-// grid_y = dst_[0]->Height() * dst_[0]->Depth();
-// grid_z = dst_[0]->Slices();
-// kWBToX_HDToY_ZIs1:
-// grid_x = dst_[0]->Width() * dst_[0]->Batch();
-// grid_y = dst_[0]->Height() * dst_[0]->Depth();
-// grid_z = 1;
-// kWBToX_HToY_DToZ:
-// grid_x = dst_[0]->Width() * dst_[0]->Batch();
-// grid_y = dst_[0]->Height();
-// grid_z = dst_[0]->Depth();
-// kBToX_YIs1_ZIs1:
-// grid_x = dst_[0]->Batch();
-// grid_y = 1;
-// grid_z = 1;
-enum class TensorToGrid
-{
- kCustom,
- kWBToX_HDToY_SToZ,
- kWBToX_HDToY_ZIs1,
- kWBToX_HToY_DToZ,
- kBToX_YIs1_ZIs1
-};
-
-struct CreationContext
-{
- const CLDevice *device;
- CLContext *context;
- CLCommandQueue *queue;
- ProgramCache *cache;
-
- const DeviceInfo &GetDeviceInfo() const { return device->info_; }
-};
-
-struct OperationDef
-{
- CalculationsPrecision precision;
- std::vector<TensorDescriptor> src_tensors;
- std::vector<TensorDescriptor> dst_tensors;
-
- // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
- DataType GetDataType() const;
- // Primary means the first src tensor, because first tensor usually defines
- // the structure of kernel, all other resources(biases) types and etc.
- DataType GetPrimaryDataType() const;
- TensorStorageType GetPrimaryStorageType() const;
- bool IsBatchSupported() const;
-};
-
-// GPUOperation represents some implementation of neural network operation on
-// GPU. GPUOperation can contain another GPU operations with flag elementwise_.
-// When GPUOperation contains another GPU ops, this GPUoperation replaces
-// some sequence of operations Op + op0 + op1 + ...
-// Because of this abilities of GPUOperation, usage scenario is next:
-// Create instance of GPUOperation.
-// Create all instances of GPUOperations that we will(probably) attach
-// to GPUOperation. Attach all GPUOperations to GPUOperation. Call
-// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
-// attached, it useless(and may be error)
-class GPUOperation
-{
-public:
- GPUOperation() = default;
- explicit GPUOperation(const OperationDef &definition);
- virtual ~GPUOperation() = default;
- // Move only
- GPUOperation(GPUOperation &&operation);
- GPUOperation &operator=(GPUOperation &&operation);
- GPUOperation(const GPUOperation &) = delete;
- GPUOperation &operator=(const GPUOperation &) = delete;
-
- absl::Status AddOperation(GPUOperation *operation);
-
- void SetSrc(Tensor *ptr, int index = 0);
- void SetDst(Tensor *ptr, int index = 0);
-
- // should be called after changes of inputs/outputs.
- absl::Status UpdateParams();
-
- absl::Status AddToQueue(CLCommandQueue *queue)
- {
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- return queue->Dispatch(kernel_, work_groups_count_, work_group_size_);
- }
-
- virtual void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info,
- std::vector<int3> *work_groups) const;
-
- absl::Status Tune(const TuningParameters &params);
-
- absl::Status AssembleCode(const DeviceInfo &device_info, CLContext *context);
-
- absl::Status Compile(const CreationContext &creation_context);
-
- absl::Status CompileDeserialized(const CreationContext &creation_context);
-
- virtual absl::Status PostCompileCheck(const DeviceInfo &, const KernelInfo &)
- {
- return absl::OkStatus();
- }
-
- const OperationDef &GetDefinition() const { return definition_; }
-
- void AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc);
- void AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc);
- void AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc);
-
- bool IsLinkable() const { return elementwise_ && linkable_; }
-
- // for linking
- void AddUniquePostfix(const std::string &unique_postfix);
-
- Arguments args_;
- std::string code_;
- int3 work_group_size_ = int3(8, 4, 1);
- std::vector<CompilerOptions> compiler_options_;
- // not applicable to elementwise
- TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
-
- bool elementwise_ = false;
- // applicable only with elementwise_ = true;
- bool linkable_ = true; // by default every elementwise is linkable
- // applicable only with elementwise_ = true;
- bool check_src_channels_size_ = false;
-
-protected:
- virtual absl::Status BindArguments(ArgumentsBinder *) { return absl::OkStatus(); }
- virtual int3 GetGridSize() const;
-
- // Defines operation calculation precision and format of src/dst tensors.
- OperationDef definition_;
- std::vector<Tensor *> src_;
- std::vector<Tensor *> dst_;
- CLKernel kernel_;
- int grid_dimension_ = 3; // can be 1, 2 or 3
- int3 work_group_launch_order_ = int3(0, 1, 2);
- int3 grid_size_ = int3(0, 0, 0);
- std::vector<std::string> src_tensors_names_;
- std::vector<std::string> dst_tensors_names_;
-
-private:
- int3 work_groups_count_ = int3(0, 0, 0);
- int linkable_count_ = 0;
- std::string elementwise_code_; // temporary, used during op construction
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc
deleted file mode 100644
index ceeab2f39..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Pooling.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetAveragePoolingKernelCode(const OperationDef &op_def, bool stride_correction,
- GPUOperation *op)
-{
- auto src_desc = op_def.src_tensors[0];
-
- src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddSrcTensor("src_tensor", src_desc);
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddDstTensor("dst_tensor", dst_desc);
-
- std::map<Axis, std::string> axis_to_src_coord = {
- {Axis::WIDTH, "x_c"}, {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
- {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
- };
-
- std::map<Axis, std::string> axis_to_dst_coord = {
- {Axis::WIDTH, "X"}, {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
- {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
- };
-
- std::vector<std::string> src_coords;
- std::vector<std::string> dst_coords;
- for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
- {
- if (op_def.dst_tensors[0].HasAxis(axis))
- {
- dst_coords.push_back(axis_to_dst_coord[axis]);
- }
- if (op_def.src_tensors[0].HasAxis(axis))
- {
- src_coords.push_back(axis_to_src_coord[axis]);
- }
- }
- std::string src_coord = src_coords[0];
- for (size_t i = 1; i < src_coords.size(); ++i)
- {
- src_coord += ", " + src_coords[i];
- }
- std::string dst_coord = dst_coords[0];
- for (size_t i = 1; i < dst_coords.size(); ++i)
- {
- dst_coord += ", " + dst_coords[i];
- }
-
- const bool manual_clamp = op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER ||
- op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
-
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int linear_id_1 = get_global_id(1);\n";
- c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n";
- c += " int D = linear_id_1 % args.dst_tensor.Depth();\n";
- }
- else
- {
- c += " int Y = get_global_id(1);\n";
- }
- c += " int Z = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- c += " float4 r = (float4)(0.0f);\n";
- c += " float window_size = 0.0;\n";
- if (stride_correction)
- {
- c += " int xs = " +
- GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
- ";\n";
- }
- else
- {
- if (op_def.IsBatchSupported())
- {
- c += " int xs = X * args.stride_x + args.padding_x * "
- "args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int xs = X * args.stride_x + args.padding_x;\n";
- }
- }
- c += " int ys = Y * args.stride_y + args.padding_y;\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int ds = D * args.stride_z + args.padding_z;\n";
- c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
- c += " int d_c = ds + kz;\n";
- c += " if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
- }
- c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
- c += " int y_c = ys + ky;\n";
- c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
- c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
- if (op_def.IsBatchSupported())
- {
- c += " int x_c = xs + kx * args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int x_c = xs + kx;\n";
- }
- c += " bool outside = outside_y || x_c < 0 || x_c >= "
- "args.src_tensor.Width();\n";
- if (manual_clamp)
- {
- c += " r += !outside ? args.src_tensor.Read<float>(" + src_coord +
- ") : "
- "(float4)(0.0f);\n";
- }
- else
- {
- c += " r += args.src_tensor.Read<float>(" + src_coord + ");\n";
- }
- c += " window_size += !outside ? 1.0 : 0.0;\n";
- c += " }\n";
- c += " }\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " } // Depth\n";
- }
- // If window_size==0, window covered nothing. This situation is a sign of
- // incorrectly constructed operation. NaNs are expected as output.
- c += " FLT4 result = TO_FLT4(r / window_size);\n";
- c += " args.dst_tensor.Write(result, " + dst_coord + ");\n";
- c += "}\n";
-
- return c;
-}
-
-std::string GetMaxPoolingKernelCode(const OperationDef &op_def, bool stride_correction,
- bool output_indices, GPUOperation *op)
-{
- auto src_desc = op_def.src_tensors[0];
- if (op_def.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddSrcTensor("src_tensor", src_desc);
- auto dst_desc = op_def.dst_tensors[0];
- if (op_def.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddDstTensor("dst_tensor", dst_desc);
- if (output_indices)
- {
- auto dst_ind_desc = op_def.dst_tensors[1];
- if (op_def.IsBatchSupported())
- {
- dst_ind_desc.SetStateVar("BatchedWidth", "true");
- }
- op->AddDstTensor("dst_indices", dst_ind_desc);
- }
-
- std::map<Axis, std::string> axis_to_src_coord = {
- {Axis::WIDTH, "x_c"}, {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
- {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
- };
-
- std::map<Axis, std::string> axis_to_dst_coord = {
- {Axis::WIDTH, "X"}, {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
- {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
- };
-
- std::vector<std::string> src_coords;
- std::vector<std::string> dst_coords;
- for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
- {
- if (op_def.dst_tensors[0].HasAxis(axis))
- {
- dst_coords.push_back(axis_to_dst_coord[axis]);
- }
- if (op_def.src_tensors[0].HasAxis(axis))
- {
- src_coords.push_back(axis_to_src_coord[axis]);
- }
- }
- std::string src_coord = src_coords[0];
- for (size_t i = 1; i < src_coords.size(); ++i)
- {
- src_coord += ", " + src_coords[i];
- }
- std::string dst_coord = dst_coords[0];
- for (size_t i = 1; i < dst_coords.size(); ++i)
- {
- dst_coord += ", " + dst_coords[i];
- }
-
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int linear_id_1 = get_global_id(1);\n";
- c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n";
- c += " int D = linear_id_1 % args.dst_tensor.Depth();\n";
- }
- else
- {
- c += " int Y = get_global_id(1);\n";
- }
- c += " int Z = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- c += " FLT4 maximum = (FLT4)(-10000.0f);\n";
- if (output_indices)
- {
- c += " FLT4 indexes = (FLT4)(0.0f);\n";
- }
- if (stride_correction)
- {
- c += " int xs = " +
- GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
- ";\n";
- }
- else
- {
- if (op_def.IsBatchSupported())
- {
- c += " int xs = X * args.stride_x + args.padding_x * "
- "args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int xs = X * args.stride_x + args.padding_x;\n";
- }
- }
- c += " int ys = Y * args.stride_y + args.padding_y;\n";
- c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
- c += " int y_c = ys + ky;\n";
- c += " if (y_c < 0 || y_c >= args.src_tensor.Height()) continue;\n";
- c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
- if (op_def.IsBatchSupported())
- {
- c += " int x_c = xs + kx * args.src_tensor.Batch();\n";
- }
- else
- {
- c += " int x_c = xs + kx;\n";
- }
- c += " if (x_c < 0 || x_c >= args.src_tensor.Width()) continue;\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " int ds = D * args.stride_z + args.padding_z;\n";
- c += " for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
- c += " int d_c = ds + kz;\n";
- c += " if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
- }
- c += " FLT4 src = args.src_tensor.Read(" + src_coord + ");\n";
- if (output_indices)
- {
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " FLT index_counter = (FLT)((ky * args.kernel_size_x + kx) * "
- "args.kernel_size_z + kz) + (FLT)(0.1f);\n";
- }
- else
- {
- c += " FLT index_counter = (FLT)(ky * args.kernel_size_x + kx) + "
- "(FLT)(0.1f);\n";
- }
- c += " if (src.x > maximum.x) {\n";
- c += " indexes.x = index_counter;\n";
- c += " maximum.x = src.x;\n";
- c += " }\n";
- c += " if (src.y > maximum.y) {\n";
- c += " indexes.y = index_counter;\n";
- c += " maximum.y = src.y;\n";
- c += " }\n";
- c += " if (src.z > maximum.z) {\n";
- c += " indexes.z = index_counter;\n";
- c += " maximum.z = src.z;\n";
- c += " }\n";
- c += " if (src.w > maximum.w) {\n";
- c += " indexes.w = index_counter;\n";
- c += " maximum.w = src.w;\n";
- c += " }\n";
- }
- else
- {
- c += " maximum = max(src, maximum);\n";
- }
- if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
- {
- c += " } // Depth\n";
- }
- c += " }\n";
- c += " }\n";
- c += " args.dst_tensor.Write(maximum, " + dst_coord + ");\n";
- if (output_indices)
- {
- c += " args.dst_indices.Write(indexes, " + dst_coord + ");\n";
- }
- c += "}\n";
-
- return c;
-}
-} // namespace
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr)
-{
- GPUOperation op(definition);
- op.args_.AddInt("kernel_size_x", attr.kernel.w);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("kernel_size_y", attr.kernel.h);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("stride_y", attr.strides.h);
-
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- if (attr.type == PoolingType::AVERAGE)
- {
- op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
- }
- else if (attr.type == PoolingType::MAX)
- {
- op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
- }
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
- return op;
-}
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr)
-{
- GPUOperation op(definition);
- op.args_.AddInt("kernel_size_x", attr.kernel.w);
- op.args_.AddInt("padding_x", -attr.padding.prepended.w);
- op.args_.AddInt("stride_x", attr.strides.w);
- op.args_.AddInt("kernel_size_y", attr.kernel.h);
- op.args_.AddInt("padding_y", -attr.padding.prepended.h);
- op.args_.AddInt("stride_y", attr.strides.h);
- op.args_.AddInt("kernel_size_z", attr.kernel.d);
- op.args_.AddInt("padding_z", -attr.padding.prepended.d);
- op.args_.AddInt("stride_z", attr.strides.d);
- const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
- if (attr.type == PoolingType::AVERAGE)
- {
- op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
- }
- else if (attr.type == PoolingType::MAX)
- {
- op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
- }
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h
deleted file mode 100644
index 166d81591..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr);
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc
deleted file mode 100644
index 37f87e599..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Relu.h"
-
-#include <string>
-#include "Util.h"
-#include "GpuOperation.h"
-#include "absl/strings/str_cat.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr)
-{
- GPUOperation op(definition);
- op.elementwise_ = true;
-
- std::string min_func;
- if (attr.alpha != 0.0f)
- {
- min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))";
- if (definition.precision == CalculationsPrecision::F32)
- {
- op.args_.AddFloat("alpha", attr.alpha);
- }
- else
- {
-#ifdef FIXME_PORTING_HALF_REQIRED
- op.args_.AddHalf("alpha", half(attr.alpha));
-#endif
- }
- }
- else
- {
- min_func = "(FLT)(0.0f)";
- }
- if (attr.clip != 0.0f)
- {
- if (definition.precision == CalculationsPrecision::F32)
- {
- op.args_.AddFloat("clip", attr.clip);
- }
- else
- {
-#ifdef FIXME_PORTING_HALF_REQIRED
- op.args_.AddHalf("clip", half(attr.clip));
-#endif
- }
- op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + ", args.clip);");
- }
- else
- {
- op.code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");");
- }
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h
deleted file mode 100644
index eb6b1ad1d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Operations.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc
deleted file mode 100644
index cdd3e8364..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GetReshapeCode(const OperationDef &op_def)
-{
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int linear_id = get_global_id(0);\n";
- c += " int X = linear_id / args.dst_tensor.Batch();\n";
- c += " int B = linear_id % args.dst_tensor.Batch();\n";
- c += " args.dst_tensor.SetBatchRef(B);\n";
- }
- else
- {
- c += " int X = get_global_id(0);\n";
- }
- c += " int Y = get_global_id(1);\n";
- c += " int Z = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- c += " FLT temps[4];\n";
- c += " temps[0] = (FLT)(0.0f);\n";
- c += " temps[1] = (FLT)(0.0f);\n";
- c += " temps[2] = (FLT)(0.0f);\n";
- c += " temps[3] = (FLT)(0.0f);\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int base = B;\n";
- }
- else
- {
- c += " int base = 0;\n";
- }
- c += " base = ((base * args.dst_tensor.Height() + Y) * "
- "args.dst_tensor.Width() + X) * args.dst_tensor.Channels() + Z * 4;\n";
- c += " for (int i = 0; i < 4; ++i) {\n";
- c += " int dst_channel = Z * 4 + i;\n";
- c += " if (dst_channel < args.dst_tensor.Channels()) {;\n";
- c += " int p = base + i;\n";
- c += " int src_c = p % args.src_tensor.Channels();\n";
- c += " p = p / args.src_tensor.Channels();\n";
- c += " int src_x = p % args.src_tensor.Width();\n";
- c += " p = p / args.src_tensor.Width();\n";
- c += " int src_y = p % args.src_tensor.Height();\n";
- if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int src_b = p / args.src_tensor.Height();\n";
- c += " args.src_tensor.SetBatchRef(src_b);\n";
- }
- c += " int src_z = src_c / 4;\n";
- c += " int src_sub_ch = src_c % 4;\n";
- c += " FLT4 t = args.src_tensor.Read(src_x, src_y, src_z);\n";
- c += " FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n";
- c += " temps[i] = t_ar[src_sub_ch];\n";
- c += " }\n";
- c += " }\n";
- c += " FLT4 result = (FLT4)(temps[0], temps[1], temps[2], temps[3]);\n";
- c += " args.dst_tensor.Write(result, X, Y, Z);\n";
- c += "}\n";
- return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshape(const OperationDef &definition)
-{
- GPUOperation op(definition);
- op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
- op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
- op.code_ = GetReshapeCode(definition);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h
deleted file mode 100644
index 4f7c5ea38..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReshape(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc
deleted file mode 100644
index 13010e791..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetReshapeCode(const OperationDef &op_def)
-{
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int linear_id = get_global_id(0);\n";
- c += " int X = linear_id / args.dst_tensor.Batch();\n";
- c += " int B = linear_id % args.dst_tensor.Batch();\n";
- c += " args.dst_tensor.SetBatchRef(B);\n";
- }
- else
- {
- c += " int X = get_global_id(0);\n";
- }
- c += " int Y = get_global_id(1);\n";
- c += " int Z = get_global_id(2);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
- "Z >= args.dst_tensor.Slices()) { \n";
- c += " return; \n";
- c += " } \n";
- if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int dst_bhwc4 = B;\n";
- }
- else
- {
- c += " int dst_bhwc4 = 0;\n";
- }
- c += " dst_bhwc4 = ((dst_bhwc4 * args.dst_tensor.Height() + Y) * "
- "args.dst_tensor.Width() + X) * args.dst_tensor.Slices() + Z;\n";
- c += " int src_z = dst_bhwc4 % args.src_tensor.Slices();\n";
- c += " dst_bhwc4 = dst_bhwc4 / args.src_tensor.Slices();\n";
- c += " int src_x = dst_bhwc4 % args.src_tensor.Width();\n";
- c += " dst_bhwc4 = dst_bhwc4 / args.src_tensor.Width();\n";
- c += " int src_y = dst_bhwc4 % args.src_tensor.Height();\n";
- if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
- {
- c += " int src_b = dst_bhwc4 / args.src_tensor.Height();\n";
- c += " args.src_tensor.SetBatchRef(src_b);\n";
- }
- c += " FLT4 result = args.src_tensor.Read(src_x, src_y, src_z);\n";
- c += " args.dst_tensor.Write(result, X, Y, Z);\n";
- c += "}\n";
- return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshapex4(const OperationDef &definition)
-{
- GPUOperation op(definition);
- op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
- op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
- op.code_ = GetReshapeCode(definition);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h
deleted file mode 100644
index 8988e8bd4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0
-GPUOperation CreateReshapex4(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc
deleted file mode 100644
index 4ee164d82..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax.h"
-
-#include <string>
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "GpuOperation.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-std::string GetSoftmaxKernelCode(const OperationDef &op_def)
-{
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- c += " int X = get_global_id(0);\n";
- c += " int Y = get_global_id(1);\n";
- c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
- "return; \n";
- c += " float sum = 0.0f;\n";
- c += " for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
- c += " float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
- c += " sum += exp(t.x);\n";
- c += " if (d * 4 + 1 < args.dst_tensor.Channels()) sum += exp(t.y);\n";
- c += " if (d * 4 + 2 < args.dst_tensor.Channels()) sum += exp(t.z);\n";
- c += " if (d * 4 + 3 < args.dst_tensor.Channels()) sum += exp(t.w);\n";
- c += " }\n";
- c += " for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
- c += " float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
- c += " t = exp(t) / sum;\n";
- c += " FLT4 result = TO_FLT4(t);\n";
- c += " args.dst_tensor.Write(result, X, Y, d);\n";
- c += " }\n";
- c += "}\n";
- return c;
-}
-} // namespace
-
-GPUOperation CreateSoftmax(const OperationDef &definition)
-{
- GPUOperation op(definition);
- auto src_desc = definition.src_tensors[0];
- if (definition.IsBatchSupported())
- {
- src_desc.SetStateVar("BatchedWidth", "true");
- }
- op.AddSrcTensor("src_tensor", src_desc);
- auto dst_desc = definition.dst_tensors[0];
- if (definition.IsBatchSupported())
- {
- dst_desc.SetStateVar("BatchedWidth", "true");
- }
- op.AddDstTensor("dst_tensor", dst_desc);
- op.code_ = GetSoftmaxKernelCode(definition);
- op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
- return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h
deleted file mode 100644
index 594bab042..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateSoftmax(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc
deleted file mode 100644
index 590952dca..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax1x1.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Softmax1x1::Softmax1x1(const OperationDef &definition) : GPUOperation(definition)
-{
- work_group_size_ = int3(32, 1, 1);
- code_ = GetSoftmaxKernelCode(definition_);
-}
-
-Softmax1x1::Softmax1x1(Softmax1x1 &&kernel) : GPUOperation(std::move(kernel)) {}
-
-Softmax1x1 &Softmax1x1::operator=(Softmax1x1 &&kernel)
-{
- if (this != &kernel)
- {
- GPUOperation::operator=(std::move(kernel));
- }
- return *this;
-}
-
-std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef &op_def)
-{
- AddSrcTensor("src_tensor", op_def.src_tensors[0]);
- AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
- args_.AddFloat("mask_x");
- args_.AddFloat("mask_y");
- args_.AddFloat("mask_z");
- args_.AddFloat("mask_w");
- args_.AddInt("slices_x32");
-
- std::string c = GetCommonDefines(op_def.precision);
- c += "__kernel void main_function(\n";
- c += "$0) {\n";
- if (op_def.IsBatchSupported())
- {
- c += " int batch_id = get_global_id(1);\n";
- c += " if (batch_id >= args.dst_tensor.Batch()) return;\n";
- c += " args.dst_tensor.SetBatchRef(batch_id);\n";
- c += " args.src_tensor.SetBatchRef(batch_id);\n";
- }
- c += " float4 mask = (float4)(args.mask_x, args.mask_y, args.mask_z, "
- "args.mask_w);\n";
- c += " int offset = 0;\n";
- c += " float sum = 0.0f;\n";
- c += " int s = 0;\n";
- c += " int tid = get_local_id(0);\n";
- c += " do {\n";
- c += " int z = offset + tid;\n";
- c += " if (z < args.dst_tensor.Slices()) {\n";
- c += " float4 mask_temp = z == args.dst_tensor.Slices() - 1 ? mask : "
- "(float4)(1.0f);\n";
- c += " float4 src = args.src_tensor.Read<float>(0, 0, z);\n";
- c += " sum += dot(mask_temp, exp(src));\n";
- c += " offset += 32;\n";
- c += " }\n";
- c += " s++;\n";
- c += " } while (s < args.slices_x32);\n";
- c += "\n";
- c += " __local float4 tmp[8];\n";
- c += " __local float* tmpx1 = (__local float*)tmp;\n";
- c += " tmpx1[tid] = sum;\n";
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- c += " if (tid == 0) {\n";
- c += " sum = dot((float4)(1.0f), tmp[0]);\n";
- c += " sum += dot((float4)(1.0f), tmp[1]);\n";
- c += " sum += dot((float4)(1.0f), tmp[2]);\n";
- c += " sum += dot((float4)(1.0f), tmp[3]);\n";
- c += " sum += dot((float4)(1.0f), tmp[4]);\n";
- c += " sum += dot((float4)(1.0f), tmp[5]);\n";
- c += " sum += dot((float4)(1.0f), tmp[6]);\n";
- c += " sum += dot((float4)(1.0f), tmp[7]);\n";
- c += " tmpx1[0] = 1.0f / sum;\n";
- c += " }\n";
- c += " barrier(CLK_LOCAL_MEM_FENCE);\n";
- c += " sum = tmpx1[0];\n";
- c += "\n";
- c += " offset = 0;\n";
- c += " s = 0;\n";
- c += " do {\n";
- c += " int z = offset + tid;\n";
- c += " if (z < args.dst_tensor.Slices()) {\n";
- c += " FLT4 res = TO_FLT4(exp(args.src_tensor.Read<float>(0, 0, "
- "z))*sum);\n";
- c += " args.dst_tensor.Write(res, 0, 0, z);\n";
- c += " offset += 32;\n";
- c += " }\n";
- c += " s++;\n";
- c += " } while (s < args.slices_x32);\n";
- c += "}\n";
- return c;
-}
-
-absl::Status Softmax1x1::BindArguments(ArgumentsBinder *args)
-{
- float4 mask = GetMaskForLastPlane(src_[0]->Channels());
- RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
- RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
- RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
- RETURN_IF_ERROR(args->SetFloat("mask_w", mask.w));
- RETURN_IF_ERROR(args->SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32)));
- return absl::OkStatus();
-}
-
-int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); }
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition) { return Softmax1x1(definition); }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h
deleted file mode 100644
index da375d457..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Softmax1x1 : public GPUOperation
-{
-public:
- Softmax1x1() = default;
- explicit Softmax1x1(const OperationDef &definition);
-
- absl::Status BindArguments(ArgumentsBinder *args) override;
- int3 GetGridSize() const override;
-
- // Move only
- Softmax1x1(Softmax1x1 &&kernel);
- Softmax1x1 &operator=(Softmax1x1 &&kernel);
- Softmax1x1(const Softmax1x1 &) = delete;
- Softmax1x1 &operator=(const Softmax1x1 &) = delete;
-
- friend Softmax1x1 CreateSoftmax1x1();
-
-private:
- std::string GetSoftmaxKernelCode(const OperationDef &op_def);
-};
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h
deleted file mode 100644
index 3d99b4fda..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-#define __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/DeviceInfo.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TuningType
-{
- EXHAUSTIVE,
- FAST
-};
-
-struct TuningParameters
-{
- ProfilingCommandQueue *queue;
- const DeviceInfo *info;
- TuningType tuning_type = TuningType::EXHAUSTIVE;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc
deleted file mode 100644
index df42c66e8..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include <cfloat>
-#include <cmath>
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision)
-{
- std::string result;
-
- switch (precision)
- {
- case CalculationsPrecision::F32:
- result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
- result += "#define ACCUM_FLT4 float4\n";
- result += "#define FLT float\n";
- result += "#define FLT2 float2\n";
- result += "#define FLT3 float3\n";
- result += "#define FLT4 float4\n";
- result += "#define TO_FLT4 convert_float4\n";
- result += "#define TO_ACCUM_TYPE convert_float4\n";
- result += "#define TO_ACCUM_FLT convert_float\n";
- break;
- case CalculationsPrecision::F16:
- result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
- result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
- result += "#define ACCUM_FLT4 half4\n";
- result += "#define FLT half\n";
- result += "#define FLT2 half2\n";
- result += "#define FLT3 half3\n";
- result += "#define FLT4 half4\n";
- result += "#define TO_FLT4 convert_half4\n";
- result += "#define TO_ACCUM_TYPE convert_half4\n";
- result += "#define TO_ACCUM_FLT convert_half\n";
- break;
- case CalculationsPrecision::F32_F16:
- result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
- result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
- result += "#define ACCUM_FLT4 float4\n";
- result += "#define FLT half\n";
- result += "#define FLT2 half2\n";
- result += "#define FLT3 half3\n";
- result += "#define FLT4 half4\n";
- result += "#define TO_FLT4 convert_half4\n";
- result += "#define TO_ACCUM_TYPE convert_float4\n";
- result += "#define TO_ACCUM_FLT convert_float\n";
- break;
- }
- return result;
-}
-
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
- const std::string &stride_x, const std::string &padding_x)
-{
- // int p0 = src_x / batch_size;\n";
- // int b0 = src_x % batch_size;\n";
- // return (p0 * stride_x + padding_x) * batch_size + b0;\n";
- return absl::Substitute("(((($0) / $1) * $2 + $3) * $1 + ($0) % $1)", src_x, batch_size, stride_x,
- padding_x);
-}
-
-float4 GetMaskForLastPlane(int channels)
-{
- float4 mask = float4(0.0f);
- const int reminder = channels % 4 == 0 ? 4 : channels % 4;
- for (int i = 0; i < reminder; ++i)
- {
- mask[i] = 1.0f;
- }
- return mask;
-}
-
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size)
-{
- for (const auto &wg : wgs)
- {
- const int wg_size = wg.x * wg.y * wg.z;
- if (wg_size <= max_wg_size)
- {
- return wg;
- }
- }
- return {1, 1, 1};
-}
-
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device_info, CalculationsPrecision precision,
- int task_size)
-{
- const float task_size_per_cu = task_size / static_cast<float>(device_info.compute_units_count);
- int block_size = 1;
- float threshold_1 = FLT_MAX;
- float threshold_2 = FLT_MAX;
- float threshold_4 = FLT_MAX;
- if (!device_info.IsMali())
- {
- return 1;
- }
- MaliInfo mali_info = device_info.mali_info;
- switch (precision)
- {
- case CalculationsPrecision::F16:
- if (mali_info.IsBifrostGen1())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 4.0f;
- threshold_4 = 256.0f * 8.0f;
- }
- else if (mali_info.IsBifrostGen2())
- {
- threshold_1 = 256.0f * 2.0f;
- threshold_2 = 256.0f * 8.0f;
- threshold_4 = 256.0f * 16.0f;
- }
- else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 6.0f;
- threshold_4 = 256.0f * 16.0f;
- }
- else if (mali_info.IsMidgard())
- {
- threshold_1 = 256.0f * 4.0f;
- threshold_2 = 256.0f * 16.0f;
- }
- break;
- case CalculationsPrecision::F32_F16:
- if (mali_info.IsBifrostGen1())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 3.0f;
- threshold_4 = 256.0f * 32.0f;
- }
- else if (mali_info.IsBifrostGen2())
- {
- threshold_1 = 256.0f * 2.0f;
- threshold_2 = 256.0f * 8.0f;
- }
- else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 8.0f;
- }
- else if (mali_info.IsMidgard())
- {
- threshold_1 = 256.0f * 4.0f;
- }
- break;
- case CalculationsPrecision::F32:
- if (mali_info.IsBifrostGen1())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 4.0f;
- }
- else if (mali_info.IsBifrostGen2())
- {
- threshold_1 = 128.0f;
- threshold_2 = 256.0f * 4.0f;
- }
- else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
- {
- threshold_1 = 256.0f;
- threshold_2 = 256.0f * 12.0f;
- }
- else if (mali_info.IsMidgard())
- {
- threshold_1 = 256.0f * 16.0f;
- }
- break;
- }
- if (task_size_per_cu <= threshold_1)
- {
- block_size = 1;
- }
- else if (task_size_per_cu <= threshold_2)
- {
- block_size = 2;
- }
- else if (task_size_per_cu <= threshold_4)
- {
- block_size = 4;
- }
- else
- {
- block_size = 8;
- }
- return block_size;
-}
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size)
-{
- int3 work_groups_count;
- work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
- work_groups_count.y = DivideRoundUp(grid_size.y, work_group_size.y);
- work_groups_count.z = DivideRoundUp(grid_size.z, work_group_size.z);
- return work_groups_count;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h
deleted file mode 100644
index 8363862c1..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-
-#include <string>
-#include <vector>
-
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
- const std::string &stride_x, const std::string &padding_x);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
- const std::string &stride_x, const std::string &padding_x);
-
-// Returns float4 mask for last plane(batch of 4 channels)
-// assumes that plane size is 4;
-// for example we have 7 channels, in our data structures we align it to 8
-// but 8s-channel will be empty, then last plane (batch of 4 channels) will
-// have this mask (1, 1, 1, 0).
-float4 GetMaskForLastPlane(int channels);
-
-// returns first work group from wgs that has size not bigger than max_wg_size
-// if no suitable groups among wgs, returns {1, 1, 1}
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size);
-
-// task_size as amount of FLT4 processed elements.
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device, CalculationsPrecision precision,
- int task_size);
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc
deleted file mode 100644
index 214fec271..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkGroupPicking.h"
-
-#include <algorithm>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "open_cl/Util.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-std::vector<int2> Get2DWorkgroupsEqualTo128()
-{
- return {{128, 1}, {64, 2}, {32, 4}, {16, 8}, {8, 16}, {4, 32}, {2, 64}, {1, 128}};
-}
-
-std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(int multiplier, int3 grid,
- const KernelInfo &kernel_info,
- const DeviceInfo &device_info,
- WorkGroupSizeAlignment z_alignment)
-{
- std::vector<int3> work_groups;
- work_groups.reserve(32);
-
- std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
-
- for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2)
- {
- for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2)
- {
- int work_group_size_xy = x * y;
- if (work_group_size_xy % multiplier != 0 ||
- work_group_size_xy > kernel_info.max_work_group_size)
- {
- continue;
- }
- for (auto z : possible_z_sizes)
- {
- if (work_group_size_xy * z > kernel_info.max_work_group_size)
- {
- continue;
- }
- if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
- z <= device_info.max_work_group_size_z)
- {
- work_groups.push_back({x, y, z});
- }
- }
- }
- }
- return work_groups;
-}
-
-std::vector<int3> GenerateWorkGroupSizesXMultipleOf(int multiplier, int3 grid,
- const KernelInfo &kernel_info,
- const DeviceInfo &device_info,
- WorkGroupSizeAlignment z_alignment)
-{
- std::vector<int3> work_groups;
- work_groups.reserve(32);
-
- std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
- std::vector<int> possible_y_sizes = GetPossibleSizes(grid.y, WorkGroupSizeAlignment::PRECISE);
-
- for (int x = multiplier; x <= kernel_info.max_work_group_size && x < grid.x + multiplier;
- x += multiplier)
- {
- for (auto y : possible_y_sizes)
- {
- for (auto z : possible_z_sizes)
- {
- if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
- z <= device_info.max_work_group_size_z && x * y * z <= kernel_info.max_work_group_size)
- {
- work_groups.push_back({x, y, z});
- }
- }
- }
- }
- return work_groups;
-}
-
-void GetWorkGroupsAlignedToGrid(const DeviceInfo &device_info, const KernelInfo &kernel_info,
- const int3 &grid, std::vector<int3> *work_groups)
-{
- int3 max_wg_size;
- max_wg_size.x = device_info.max_work_group_size_x;
- max_wg_size.y = device_info.max_work_group_size_y;
- max_wg_size.z = device_info.max_work_group_size_z;
- GenerateWorkGroupSizesAlignedToGrid(grid, max_wg_size, kernel_info.max_work_group_size,
- work_groups);
-}
-
-int GetPenalty(int grid_size, int group_size)
-{
- const int reminder = grid_size % group_size;
- return reminder == 0 ? 0 : group_size - reminder;
-}
-
-int GetPenalty(int2 grid_size, int2 group_size)
-{
- const int p_x = GetPenalty(grid_size.x, group_size.x);
- const int p_y = GetPenalty(grid_size.y, group_size.y);
- return p_x * grid_size.y + p_y * grid_size.x + p_x * p_y;
-}
-
-int GetMaxSizeWithMinPenalty(int size, int max_size)
-{
- int best_size = 128;
- int min_penalty = GetPenalty(size, best_size);
- for (int i = 2; i * 128 <= max_size; ++i)
- {
- if (GetPenalty(size, i * 128) == min_penalty)
- {
- best_size = i * 128;
- }
- }
- return best_size;
-}
-
-int2 GetMaxSizeWithMinPenalty(int2 size, int max_size)
-{
- std::vector<int2> base_groups = Get2DWorkgroupsEqualTo128();
- int min_penalty = std::numeric_limits<int>::max();
- for (const auto &group : base_groups)
- {
- min_penalty = std::min(GetPenalty(size, group), min_penalty);
- }
- for (const auto &group : base_groups)
- {
- for (int y = 1; y * group.y <= max_size; ++y)
- {
- int new_group_y = y * group.y;
- for (int x = 1; x * group.x <= max_size; ++x)
- {
- int new_group_x = x * group.x;
- if (new_group_x * new_group_y > max_size)
- {
- break;
- }
- if (GetPenalty(size, int2(new_group_x, new_group_y)) == min_penalty)
- {
- return int2(new_group_x, new_group_y);
- }
- }
- }
- }
- return int2(0, 0);
-}
-
-int GetBiggestDividerWithPriority(int number, int max_divider)
-{
- if (number % 8 == 0 && 8 <= max_divider)
- {
- return 8;
- }
- if (number % 4 == 0 && 4 <= max_divider)
- {
- return 4;
- }
- if (number % 2 == 0 && 2 <= max_divider)
- {
- return 2;
- }
- for (int i = max_divider; i != 0; i--)
- {
- if (number % i == 0)
- {
- return i;
- }
- }
- return 1;
-}
-
-int GetBiggestDivider(int number, int max_divider)
-{
- for (int i = max_divider; i != 0; i--)
- {
- if (number % i == 0)
- {
- return i;
- }
- }
- return 1;
-}
-
-} // namespace
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid)
-{
- int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
- if (grid.x <= 128)
- {
- return int3(128, 1, grid_z);
- }
- int grid_x = GetMaxSizeWithMinPenalty(grid.x, 512 / grid_z);
- return {grid_x, 1, grid_z};
-}
-
-int3 GetWorkGroupXY128Conv(const int3 &grid)
-{
- int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
- if (grid.x <= 16 && grid.y <= 8)
- {
- return int3(16, 8, grid_z);
- }
- int2 grid_xy = GetMaxSizeWithMinPenalty(int2(grid.x, grid.y), 512 / grid_z);
- return int3(grid_xy.x, grid_xy.y, grid_z);
-}
-
-// int3 GetWorkGroupXY128Simple(const int3& grid) { return int3(16, 8, 1); }
-
-int3 GetWorkGroup(const int3 &grid, int max_size)
-{
- int wg_z = GetBiggestDividerWithPriority(grid.z, 8);
- int wg_xy_size = max_size / wg_z;
- int wg_x = std::min(DivideRoundUp(grid.x, 2), wg_xy_size);
- int wg_y = std::min(wg_xy_size / wg_x, grid.y);
- return int3(wg_x, wg_y, wg_z);
-}
-
-int3 GetWorkGroupConv(const int3 &grid, int max_size, int max_z_size)
-{
- int wg_z = GetBiggestDivider(grid.z, max_z_size);
- int wg_xy_size = std::min(256, max_size) / wg_z;
- int wg_x = std::min(grid.x, wg_xy_size);
- int wg_y = std::min(wg_xy_size / wg_x, grid.y);
- if (wg_y == grid.y && grid.y % 2 == 0)
- {
- wg_y = grid.y / 2;
- }
- return int3(wg_x, wg_y, wg_z);
-}
-
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- WorkGroupSizeAlignment z_alignment,
- std::vector<int3> *work_groups)
-{
- *work_groups =
- GenerateWorkGroupSizesXYMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- WorkGroupSizeAlignment z_alignment,
- std::vector<int3> *work_groups)
-{
- *work_groups =
- GenerateWorkGroupSizesXMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height)
-{
- int planar_work_groups = DivideRoundUp(width * height, 128);
- auto base_work_groups = Get2DWorkgroupsEqualTo128();
- bool have_equal_work_groups = false;
- for (auto &work_group : base_work_groups)
- {
- int x_groups = DivideRoundUp(width, work_group.x);
- int y_groups = DivideRoundUp(height, work_group.y);
- int xy_groups = x_groups * y_groups;
- if (xy_groups == planar_work_groups)
- {
- have_equal_work_groups = true;
- break;
- }
- }
- return !have_equal_work_groups;
-}
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- std::vector<int3> *work_groups)
-{
- switch (tuning_type)
- {
- case TuningType::FAST:
- work_groups->push_back(GetWorkGroup(grid, kernel_info.max_work_group_size));
- return;
- case TuningType::EXHAUSTIVE:
- {
- GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
- return;
- }
- default:
- work_groups->push_back({8, 4, 1});
- return;
- }
-}
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- std::vector<int3> *work_groups)
-{
- switch (tuning_type)
- {
- case TuningType::FAST:
- {
- int max_z_size = 16;
- if (device_info.IsAdreno())
- {
- max_z_size = device_info.IsAdreno3xx() ? 16 : 64;
- }
- max_z_size = std::min(max_z_size, device_info.max_work_group_size_z);
- work_groups->push_back(GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
- return;
- }
- case TuningType::EXHAUSTIVE:
- {
- GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
- return;
- }
- default:
- work_groups->push_back({8, 4, 1});
- return;
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h
deleted file mode 100644
index c19890de1..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/ClKernel.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Types.h"
-#include "open_cl/WorkgroupSelection.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// multiplier can be power of two only
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- WorkGroupSizeAlignment z_alignment,
- std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- WorkGroupSizeAlignment z_alignment,
- std::vector<int3> *work_groups);
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid);
-
-int3 GetWorkGroupXY128Simple(const int3 &grid);
-int3 GetWorkGroupXY128Conv(const int3 &grid);
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height);
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
- const KernelInfo &kernel_info, const int3 &grid,
- std::vector<int3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc
deleted file mode 100644
index eac6f3270..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/kernels/ConvBuffer1x1.h"
-#include "open_cl/kernels/ConvConstants.h"
-#include "open_cl/kernels/ConvPowervr.h"
-#include "open_cl/kernels/ConvWeightsConverter.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation> SelectConvolutionAdreno(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints)
-{
- if (IsConvConstantsSupported(device_info, op_def, attr))
- {
- GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
- return absl::make_unique<GPUOperation>(std::move(conv));
- }
- else
- {
- ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradAdreno(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def,
- ModelHints)
-{
- ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsAdreno(const Convolution2DAttributes &attr,
- const BHWC &weights_shape, const BHWC &dst_shape,
- const DeviceInfo &device_info, const OperationDef &op_def,
- ModelHints, ConvWeightsDescription *weights_desc)
-{
- ConvPowerVR conv =
- CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
- *weights_desc = conv.GetConvWeightsDescription();
- return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionNVidia(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- if (IsConvConstantsSupported(device_info, op_def, attr))
- {
- GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
- return absl::make_unique<GPUOperation>(std::move(conv));
- }
- else
- {
- ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionPowerVR(const Convolution2DAttributes &attr,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionMali(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
- IsConvBuffer1x1Supported(op_def, attr))
- {
- ConvBuffer1x1 conv = CreateConvBuffer1x1(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvBuffer1x1>(std::move(conv));
- }
- else
- {
- ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradMali(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER)
- {
- ConvBuffer1x1 conv = CreateConvBuffer1x1Wino4x4To6x6(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvBuffer1x1>(std::move(conv));
- }
- else
- {
- ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsMali(const Convolution2DAttributes &attr, const BHWC &weights_shape,
- const BHWC &dst_shape, const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints,
- ConvWeightsDescription *weights_desc)
-{
- if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
- IsConvBuffer1x1Supported(op_def, weights_shape, attr))
- {
- ConvBuffer1x1 conv =
- CreateConvBuffer1x1DynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
- *weights_desc = conv.GetConvWeightsDescription();
- return absl::make_unique<ConvBuffer1x1>(std::move(conv));
- }
- else
- {
- ConvPowerVR conv =
- CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
- *weights_desc = conv.GetConvWeightsDescription();
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints hints)
-{
- if (device_info.IsAdreno())
- {
- return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
- }
- else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsIntel())
- {
- return SelectConvolutionPowerVR(attr, device_info, op_def);
- }
- else if (device_info.IsNvidia())
- {
- return SelectConvolutionNVidia(attr, dst_shape, device_info, op_def);
- }
- else if (device_info.IsMali())
- {
- return SelectConvolutionMali(attr, dst_shape, device_info, op_def);
- }
- else
- {
- return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
- }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def,
- ModelHints hints)
-{
- if (device_info.IsAdreno())
- {
- return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
- }
- else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsNvidia() ||
- device_info.IsIntel())
- {
- ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
- else if (device_info.IsMali())
- {
- return SelectConvolutionWinogradMali(attr, dst_shape, device_info, op_def);
- }
- else
- {
- return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
- }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
- const BHWC &dst_shape, const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints hints,
- ConvWeightsDescription *weights_desc)
-{
- if (device_info.IsAdreno())
- {
- return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, device_info,
- op_def, hints, weights_desc);
- }
- else if (device_info.IsMali())
- {
- return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, device_info, op_def,
- hints, weights_desc);
- }
- else
- {
- ConvPowerVR conv =
- CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
- *weights_desc = conv.GetConvWeightsDescription();
- return absl::make_unique<ConvPowerVR>(std::move(conv));
- }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
- ModelHints)
-{
- ConverterToConvWeights converter = ConverterToConvWeights(op_def, weights_desc);
- return absl::make_unique<ConverterToConvWeights>(std::move(converter));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h
deleted file mode 100644
index d45eea8bd..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ModelHints.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints hints);
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
- const BHWC &dst_shape,
- const DeviceInfo &device_info,
- const OperationDef &op_def,
- ModelHints hints);
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
- const BHWC &dst_shape, const DeviceInfo &device_info,
- const OperationDef &op_def, ModelHints hints,
- ConvWeightsDescription *weights_desc);
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
- ModelHints hints);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc
deleted file mode 100644
index f07eef689..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DwConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/DepthwiseConv3x3.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionAdreno(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info, const OperationDef &op_def)
-{
- if (IsDepthwiseConv3x3Supported(attr))
- {
- return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
- }
- else
- {
- return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
- }
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionPowerVR(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info, const OperationDef &op_def)
-{
- if (IsDepthwiseConv3x3Supported(attr))
- {
- return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
- }
- else
- {
- return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
- }
-}
-
-std::unique_ptr<GPUOperation> SelectDWConvolutionMali(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- const auto storage_type = op_def.src_tensors[0].storage_type;
- bool buffer_type =
- storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER;
- const MaliInfo mali_info = device_info.mali_info;
- if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type &&
- op_def.precision != CalculationsPrecision::F32)
- {
- return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
- }
- else
- {
- return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
- }
-}
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info,
- const OperationDef &op_def)
-{
- if (device_info.IsAdreno())
- {
- return SelectDWConvolutionAdreno(attr, device_info, op_def);
- }
- else if (device_info.IsPowerVR())
- {
- return SelectDWConvolutionPowerVR(attr, device_info, op_def);
- }
- else if (device_info.IsMali())
- {
- return SelectDWConvolutionMali(attr, device_info, op_def);
- }
- else
- {
- return SelectDWConvolutionAdreno(attr, device_info, op_def);
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h
deleted file mode 100644
index 2fa40c5c3..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info,
- const OperationDef &op_def);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc
deleted file mode 100644
index ac514b26c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SimpleSelectors.h"
-
-#include <memory>
-#include <set>
-
-#include "open_cl/kernels/Add.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/Pooling.h"
-#include "open_cl/kernels/Relu.h"
-#include "open_cl/kernels/Reshape.h"
-#include "open_cl/kernels/Reshapex4.h"
-#include "open_cl/kernels/Softmax.h"
-#include "open_cl/kernels/Softmax1x1.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
- std::unique_ptr<GPUOperation> *ptr)
-{
- GPUOperation operation = CreateAdd(op_def, channels, dst_channels);
- *ptr = std::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info, const OperationDef &op_def)
-{
- return absl::make_unique<GPUOperation>(
- CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr));
-}
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
- const OperationDef &op_def)
-{
- GPUOperation operation = CreatePooling(op_def, attr);
- return absl::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def)
-{
- return absl::make_unique<GPUOperation>(CreateReLU(op_def, attr));
-}
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
- std::unique_ptr<GPUOperation> *ptr)
-{
- if (src_channels % 4 == 0 && dst_channels % 4 == 0)
- {
- GPUOperation operation = CreateReshapex4(op_def);
- *ptr = std::make_unique<GPUOperation>(std::move(operation));
- }
- else
- {
- GPUOperation operation = CreateReshape(op_def);
- *ptr = std::make_unique<GPUOperation>(std::move(operation));
- }
-}
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
- std::unique_ptr<GPUOperation> *ptr)
-{
- if (shape.w == 1 && shape.h == 1)
- {
- Softmax1x1 operation = CreateSoftmax1x1(op_def);
- *ptr = absl::make_unique<Softmax1x1>(std::move(operation));
- }
- else
- {
- GPUOperation operation = CreateSoftmax(op_def);
- *ptr = absl::make_unique<GPUOperation>(std::move(operation));
- }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h
deleted file mode 100644
index 2c5837a1d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-
-#include <memory>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
- std::unique_ptr<GPUOperation> *ptr);
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
- const DeviceInfo &device_info, const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
- const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def);
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
- std::unique_ptr<GPUOperation> *ptr);
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
- std::unique_ptr<GPUOperation> *ptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
index 6dd9bd252..d3ed102a1 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -16,10 +16,12 @@
#include "CLTensor.h"
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+using namespace tflite::gpu::cl;
namespace onert
{
@@ -30,16 +32,15 @@ namespace gpu_cl
namespace operand
{
-CLTensor::CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
- : ICLTensor{rank, shape, environment}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, ir::Shape shape,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
+ : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
{
}
-const Tensor *CLTensor::handle() const { return _tensor.get(); }
-
-Tensor *CLTensor::handle() { return _tensor.get(); }
+const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); }
-void CLTensor::setBuffer(void *host_ptr) { (void)host_ptr; }
+tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); }
} // namespace operand
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
index 7d2e70a99..f2153f430 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -19,9 +19,9 @@
#include "ICLTensor.h"
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
namespace onert
{
@@ -38,11 +38,12 @@ public:
CLTensor() = delete;
public:
- CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment);
+ CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+ TensorType type);
public:
- const Tensor *handle() const override;
- Tensor *handle() override;
+ const tflite::gpu::cl::Tensor *handle() const override;
+ tflite::gpu::cl::Tensor *handle() override;
public:
/** Set given buffer as the buffer of the tensor
@@ -55,7 +56,7 @@ public:
void setBuffer(void *host_ptr);
private:
- std::shared_ptr<Tensor> _tensor;
+ std::shared_ptr<tflite::gpu::cl::Tensor> _tensor;
};
} // namespace operand
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
index 3f070be0c..a95f78056 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -16,11 +16,11 @@
#include "ICLTensor.h"
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/kernels/Converter.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
namespace onert
{
@@ -31,6 +31,10 @@ namespace gpu_cl
namespace operand
{
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+using namespace tflite::gpu::internal_tensor;
+
void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
{
if (total_size() == 0)
@@ -39,100 +43,133 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
fn(*this);
}
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::writeConvertInit()
{
- const float *arr = (float *)ptr;
- TensorObject input_obj = MakeReadableCpuMemory(absl::MakeSpan(arr, total_size() / 4));
+ TensorObjectDef input_def;
+ input_def.dimensions.b = handle()->Batch();
+ input_def.dimensions.h = handle()->Height();
+ input_def.dimensions.w = handle()->Width();
+ input_def.dimensions.c = handle()->Channels();
+ input_def.object_def.data_layout = DataLayout::BHWC;
+ input_def.object_def.data_type = DataType::FLOAT32;
+ input_def.object_def.object_type = ObjectType::CPU_MEMORY;
+ input_def.object_def.user_provided = true;
- TensorObject output_obj;
+ TensorObjectDef permute_def = input_def;
+ permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
- if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+ auto dims = permute_def.dimensions;
+ const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+ const TensorDescriptor desc{
+ permute_def.object_def.data_type,
+ ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+ Layout::BHWC};
+ if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
{
- output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+ throw std::runtime_error("Failed to AllocateTensorMemory");
}
- else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+
+ TensorObjectDef output_def = permute_def;
+ output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+ output_def.object_def.data_type = handle()->GetDataType();
+ input_def.object_def.user_provided = false;
+
+ _converter_builder = NewConverterBuilder(_environment.get());
+ if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
{
- output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+ throw std::runtime_error("Failed to make converter_to");
}
- else
+ if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
{
- output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+ throw std::runtime_error("Failed to make converter_from");
}
+}
+
+void ICLTensor::readConvertInit()
+{
+ _converter_builder = NewConverterBuilder(_environment.get());
TensorObjectDef input_def;
input_def.dimensions.b = handle()->Batch();
input_def.dimensions.h = handle()->Height();
input_def.dimensions.w = handle()->Width();
input_def.dimensions.c = handle()->Channels();
- input_def.object_def.data_layout = DataLayout::BHWC;
- input_def.object_def.data_type = DataType::FLOAT32;
- input_def.object_def.object_type = ObjectType::CPU_MEMORY;
- input_def.object_def.user_provided = true;
+ input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+ input_def.object_def.data_type = handle()->GetDataType();
+ input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+ input_def.object_def.user_provided = false;
- TensorObjectDef tmp_def;
- tmp_def.dimensions.b = handle()->Batch();
- tmp_def.dimensions.h = handle()->Height();
- tmp_def.dimensions.w = handle()->Width();
- tmp_def.dimensions.c = handle()->Channels();
- tmp_def.object_def.data_layout = DataLayout::BHWC;
- tmp_def.object_def.data_type = DataType::FLOAT32;
- tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
- tmp_def.object_def.user_provided = true;
-
- auto dims = tmp_def.dimensions;
+ TensorObjectDef permute_def = input_def;
+ permute_def.object_def.data_layout = DataLayout::BHWC;
+ permute_def.object_def.data_type = DataType::FLOAT32;
+ permute_def.object_def.user_provided = true;
+
+ auto dims = permute_def.dimensions;
const BHWC shape(dims.b, dims.h, dims.w, dims.c);
const TensorDescriptor desc{
- tmp_def.object_def.data_type,
- ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
+ permute_def.object_def.data_type,
+ ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
Layout::BHWC};
if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
{
- throw std::runtime_error("AllocateTensorMemory error.");
+ throw std::runtime_error("Failed to AllocateTensorMemory");
}
- TensorObject tmp_obj;
- if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
+
+ TensorObjectDef output_def = permute_def;
+ output_def.object_def.object_type = ObjectType::CPU_MEMORY;
+
+ if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
{
- tmp_obj = OpenClTexture{_cl_memory.memory()};
+ throw std::runtime_error("Failed to make converter_from");
}
- else
+ if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
{
- tmp_obj = OpenClBuffer{_cl_memory.memory()};
+ throw std::runtime_error("Failed to make converter_to");
}
+}
- TensorObjectDef output_def = input_def;
- output_def.dimensions.b = handle()->Batch();
- output_def.dimensions.h = handle()->Height();
- output_def.dimensions.w = handle()->Width();
- output_def.dimensions.c = handle()->Channels();
- output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
- output_def.object_def.data_type = handle()->GetDataType();
- output_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+{
+ TensorObject input_obj =
+ MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
- _converter_builder = NewConverterBuilder(_environment.get());
- if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_cpu).ok())
+ TensorObject output_obj;
+
+ TensorObject permute_obj;
+ if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
{
- throw std::runtime_error("MakeConverter<_converter_cpu> error.");
+ permute_obj = OpenClTexture{_cl_memory.memory()};
}
- if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_bhwc).ok())
+ else
{
- throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
+ permute_obj = OpenClBuffer{_cl_memory.memory()};
}
- if (!_converter_cpu->Convert(input_obj, tmp_obj).ok())
+ if (handle()->GetStorageType() == TensorStorageType::BUFFER)
{
- throw std::runtime_error("[w] _converter_cpu Convert error.");
+ output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
}
- if (!_converter_bhwc->Convert(tmp_obj, output_obj).ok())
+ else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
{
- throw std::runtime_error("[w] _converter_bhwc Convert error.");
+ output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+ }
+ else
+ {
+ output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+ }
+
+ if (!_converter_to->Convert(input_obj, permute_obj).ok())
+ {
+ throw std::runtime_error("Failed to write cl buffer from cpu memory");
+ }
+ if (!_converter_from->Convert(permute_obj, output_obj).ok())
+ {
+ throw std::runtime_error("Failed to change layout");
}
}
void ICLTensor::enqueueReadBuffer(void *ptr, bool)
{
- float *arr = (float *)ptr;
- TensorObject output_obj = MakeCpuMemory(absl::MakeSpan(arr, total_size() / 4));
-
TensorObject input_obj;
if (handle()->GetStorageType() == TensorStorageType::BUFFER)
@@ -148,72 +185,26 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
input_obj = OpenClTexture{handle()->GetMemoryPtr()};
}
- TensorObjectDef input_def;
- input_def.dimensions.b = handle()->Batch();
- input_def.dimensions.h = handle()->Height();
- input_def.dimensions.w = handle()->Width();
- input_def.dimensions.c = handle()->Channels();
- input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
- input_def.object_def.data_type = handle()->GetDataType();
- input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
- input_def.object_def.user_provided = false;
-
- TensorObjectDef tmp_def;
- tmp_def.dimensions.b = handle()->Batch();
- tmp_def.dimensions.h = handle()->Height();
- tmp_def.dimensions.w = handle()->Width();
- tmp_def.dimensions.c = handle()->Channels();
- tmp_def.object_def.data_layout = DataLayout::BHWC;
- tmp_def.object_def.data_type = DataType::FLOAT32;
- tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
- tmp_def.object_def.user_provided = true;
-
- auto dims = tmp_def.dimensions;
- const BHWC shape(dims.b, dims.h, dims.w, dims.c);
- const TensorDescriptor desc{
- tmp_def.object_def.data_type,
- ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
- Layout::BHWC};
- if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+ TensorObject permute_obj;
+ if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
{
- throw std::runtime_error("AllocateTensorMemory error.");
- }
- TensorObject tmp_obj;
- if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
- {
- tmp_obj = OpenClTexture{_cl_memory.memory()};
+ permute_obj = OpenClTexture{_cl_memory.memory()};
}
else
{
- tmp_obj = OpenClBuffer{_cl_memory.memory()};
+ permute_obj = OpenClBuffer{_cl_memory.memory()};
}
- TensorObjectDef output_def = input_def;
- output_def.dimensions.b = handle()->Batch();
- output_def.dimensions.h = handle()->Height();
- output_def.dimensions.w = handle()->Width();
- output_def.dimensions.c = handle()->Channels();
- output_def.object_def.data_layout = DataLayout::BHWC;
- output_def.object_def.data_type = DataType::FLOAT32;
- output_def.object_def.object_type = ObjectType::CPU_MEMORY;
- output_def.object_def.user_provided = true;
- _converter_builder = NewConverterBuilder(_environment.get());
- if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_bhwc).ok())
- {
- throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
- }
- if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_cpu).ok())
- {
- throw std::runtime_error("MakeConverter<_converter_cpu> error.");
- }
+ TensorObject output_obj =
+ MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
- if (!_converter_bhwc->Convert(input_obj, tmp_obj).ok())
+ if (!_converter_from->Convert(input_obj, permute_obj).ok())
{
- throw std::runtime_error("[r] _converter_bhwc Convert error.");
+ throw std::runtime_error("Failed to change layout");
}
- if (!_converter_cpu->Convert(tmp_obj, output_obj).ok())
+ if (!_converter_to->Convert(permute_obj, output_obj).ok())
{
- throw std::runtime_error("[r] _converter_cpu Convert error.");
+ throw std::runtime_error("Failed to read cl buffer");
}
}
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
index 28e905d48..b8ad4469f 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -19,11 +19,14 @@
#include <backend/ITensor.h>
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/kernels/Converter.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+
+#include "TensorBuilderHelper.h"
namespace onert
{
@@ -43,19 +46,18 @@ public:
ICLTensor(ICLTensor &&) = default;
ICLTensor &operator=(ICLTensor &&) = default;
- ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
- : _rank{rank}, _shape{shape}, _environment(environment)
+ ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+ TensorType type)
+ : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
{
}
public:
uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
- size_t calcOffset(const ir::Coordinates &coords) const final
+ size_t calcOffset(const ir::Coordinates &) const final
{
- // NYI
- (void)coords;
- return 0;
+ throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
}
ir::Layout layout() const final { return ir::Layout::NHWC; }
ir::DataType data_type() const final { return ir::DataType::FLOAT32; }
@@ -83,19 +85,24 @@ public:
void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
void enqueueReadBuffer(void *ptr, bool blocking = true) final;
+ void writeConvertInit();
+ void readConvertInit();
+ TensorType get_type() { return _type; }
+
public:
- virtual const Tensor *handle() const = 0;
- virtual Tensor *handle() = 0;
+ virtual const tflite::gpu::cl::Tensor *handle() const = 0;
+ virtual tflite::gpu::cl::Tensor *handle() = 0;
private:
protected:
size_t _rank; // Actual rank (reflects extended rank)
ir::Shape _shape;
- std::shared_ptr<Environment> _environment;
- std::unique_ptr<TensorObjectConverterBuilder> _converter_builder;
- CLMemory _cl_memory;
- std::unique_ptr<TensorObjectConverter> _converter_cpu;
- std::unique_ptr<TensorObjectConverter> _converter_bhwc;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+ TensorType _type;
+ std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+ tflite::gpu::cl::CLMemory _cl_memory;
+ std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
+ std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
};
} // namespace operand
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
index 5dfdc7ec5..716400c1f 100644
--- a/runtime/onert/backend/ruy/ops/OperationUtils.h
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -18,17 +18,17 @@
#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
#include <ruy/Shape.h>
#include <ruy/Types.h>
-#include <iostream>
-#include <ir/DataType.h>
-#include <ir/InternalType.h>
-#include <ir/Padding.h>
#include <limits>
using OperandType = onert::ir::DataType;
+using namespace onert::util;
namespace onert
{
@@ -79,40 +79,6 @@ inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Ac
}
}
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::SIGMOID)
- {
- *activation_min = 0;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- std::cout << "Unsupported fused activation function." << std::endl;
- }
-}
-
nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
} // namespace ops
diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h
new file mode 100644
index 000000000..a63839720
--- /dev/null
+++ b/runtime/onert/backend/trix/Backend.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+ {
+ auto &graph = *data.graph;
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
+ auto tr = std::make_shared<basic::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc
new file mode 100644
index 000000000..e46b11d20
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+ FunctionMap ret;
+
+ for (auto op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ basic::initConsts(*this);
+
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BackendContext.h b/runtime/onert/backend/trix/BackendContext.h
new file mode 100644
index 000000000..c0734c46d
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "DevContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, ContextData &&data,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _dev_context(new DevContext)
+ {
+ }
+
+ ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
+
+ std::shared_ptr<DevContext> dev_context() { return _dev_context; }
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
new file mode 100644
index 000000000..5455757ca
--- /dev/null
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
+
+nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+if(NOT TRIXEngine_FOUND)
+ return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE trix_engine)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES OUTPUT_NAME backend_trix)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRIX} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRIX}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION lib)
diff --git a/runtime/onert/backend/trix/Config.cc b/runtime/onert/backend/trix/Config.cc
new file mode 100644
index 000000000..c23326423
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Config.h b/runtime/onert/backend/trix/Config.h
new file mode 100644
index 000000000..799047d6f
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONFIG_H__
+#define __ONERT_BACKEND_TRIX_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "trix"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONFIG_H__
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
new file mode 100644
index 000000000..482932fd4
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class DevContext
+{
+public:
+ DevContext()
+ {
+ auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+ if (device_count <= 0)
+ {
+ throw std::runtime_error("Unable to find TRIV2 NPU device");
+ }
+
+ // Use NPU 0 device
+ if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+ {
+ throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+ }
+ }
+
+ ~DevContext()
+ {
+ if (_dev_handle != nullptr)
+ {
+ unregisterNPUmodel_all(_dev_handle);
+ putNPUdevice(_dev_handle);
+ }
+ }
+
+ npudev_h getDev() { return _dev_handle; }
+
+ template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
+ {
+ info->num_info = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < info->num_info; ++idx)
+ {
+ info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+ info->info[idx].type = convertDataType(tensors[idx]->data_type());
+ }
+ }
+
+ template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+ {
+ buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+ {
+ buf->bufs[idx].addr = tensors[idx]->buffer();
+ buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+ buf->bufs[idx].type = BUFFER_MAPPED;
+ }
+ }
+
+private:
+ data_layout convertDataLayout(const ir::Layout layout)
+ {
+ switch (layout)
+ {
+ case ir::Layout::NCHW:
+ return DATA_LAYOUT_NCHW;
+ case ir::Layout::NHWC:
+ return DATA_LAYOUT_NHWC;
+ default:
+ throw std::runtime_error("Unknown Layout");
+ }
+ }
+
+ data_type convertDataType(const ir::DataType type)
+ {
+ switch (type)
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ return DATA_TYPE_QASYMM8;
+ case ir::DataType::QUANT_INT16_SYMM:
+ return DATA_TYPE_QSYMM16;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+ }
+
+private:
+ // NPU device handle
+ // TODO Support multicore npu device
+ npudev_h _dev_handle;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
new file mode 100644
index 000000000..68e6840dd
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/BulkLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<DevContext> &dev_context)
+ : basic::KernelGeneratorBase{graph},
+ _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context}
+{
+ // DO NOTHING
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+ auto ret = std::make_unique<exec::FunctionSequence>();
+ ret->enableDynamicShapeInferer(false);
+
+ const auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ ret->append(releaseFunction());
+ return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::Bulk &node)
+{
+ using ir::operation::Bulk;
+
+ std::vector<IPortableTensor *> output_tensors;
+ for (auto &ofm_idx : node.getOutputs())
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
+
+ std::vector<const IPortableTensor *> input_tensors;
+ for (auto &ifm_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
+
+ // parameters
+ const auto binary_path = node.param().binary_path;
+
+ auto fn = std::make_unique<ops::BulkLayer>();
+
+ fn->configure(input_tensors, output_tensors, binary_path, _dev_context);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h
new file mode 100644
index 000000000..d87dc6952
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+#include "DevContext.h"
+
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<DevContext> &dev_context);
+
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
+
+private:
+ void visit(const ir::operation::Bulk &node) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ ir::Layout _current_layout;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+ const std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/trix/Tensor.h
index 708436327..5138cee71 100644
--- a/runtime/onert/backend/acl_common/ParentInfo.h
+++ b/runtime/onert/backend/trix/Tensor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,31 +14,24 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_H__
-#include <ir/Index.h>
-#include <ir/Coordinates.h>
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
namespace onert
{
namespace backend
{
-namespace acl_common
+namespace trix
{
-/**
- * @brief Struct to represent parent operand in child operand
- */
-struct ParentInfo
-{
- ir::OperandIndex parent;
- ir::Layout frontend_layout;
- ir::Coordinates coordinates;
-};
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
-} // namespace acl_common
+} // namespace trix
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#endif // __ONERT_BACKEND_TRIX_TENSOR_H__
diff --git a/runtime/onert/backend/trix/TensorBuilder.h b/runtime/onert/backend/trix/TensorBuilder.h
new file mode 100644
index 000000000..ac6ca0f9a
--- /dev/null
+++ b/runtime/onert/backend/trix/TensorBuilder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using TensorBuilder = basic::TensorBuilder;
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
new file mode 100644
index 000000000..71fdf3f0d
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BulkLayer.h"
+#include <util/logging.h>
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+{
+ // DO NOTHING
+}
+
+BulkLayer::~BulkLayer() { free(_meta); }
+
+void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
+ std::vector<IPortableTensor *> &outputs, std::string binary_path,
+ const std::shared_ptr<DevContext> &dev_context)
+{
+ _inputs = inputs;
+ _outputs = outputs;
+ _dev_context = dev_context;
+
+ _meta = getNPUmodel_metadata(binary_path.c_str(), false);
+ if (_meta == nullptr)
+ {
+ throw std::runtime_error("Unable to extract the model metadata");
+ }
+
+ generic_buffer model_file;
+ model_file.type = BUFFER_FILE;
+ model_file.filepath = binary_path.c_str();
+ model_file.size = _meta->size;
+
+ if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+ {
+ throw std::runtime_error("Failed to register npu model");
+ }
+}
+
+void BulkLayer::run()
+{
+ int req_id;
+ if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+ {
+ throw std::runtime_error("Unable to create NPU request with model id (" +
+ std::to_string(_model_id) + ")");
+ }
+
+ if (_meta->input_seg_num != _inputs.size())
+ {
+ throw std::runtime_error("input size does not match to model input seg num");
+ }
+
+ if (_meta->output_seg_num != _outputs.size())
+ {
+ throw std::runtime_error("output size does not match to model output seg num");
+ }
+
+ tensors_data_info in_info;
+ tensors_data_info out_info;
+ _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
+ _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+
+ input_buffers input_buf;
+ output_buffers output_buf;
+ _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
+ _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+
+ if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
+ &out_info))
+ {
+ throw std::runtime_error("Unable to create NPU request for model id (" +
+ std::to_string(_model_id) + ")");
+ }
+
+ if (submitNPU_request(_dev_context->getDev(), req_id))
+ {
+ throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+ ")");
+ }
+
+ if (removeNPU_request(_dev_context->getDev(), req_id))
+ {
+ throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+ ")");
+ }
+}
+
+void BulkLayer::prepare()
+{
+ // DO NOTHING
+}
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
new file mode 100644
index 000000000..f7080ccad
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+#define __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../DevContext.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+class BulkLayer : public ::onert::exec::IFunction
+{
+public:
+ BulkLayer();
+ ~BulkLayer();
+
+public:
+ void configure(const std::vector<const IPortableTensor *> &inputs,
+ std::vector<IPortableTensor *> &outputs, std::string binary_path,
+ const std::shared_ptr<DevContext> &dev_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ std::vector<const IPortableTensor *> _inputs;
+ std::vector<IPortableTensor *> _outputs;
+
+ uint32_t _model_id;
+ npubin_meta *_meta;
+ std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
diff --git a/runtime/onert/backend/trix/trix.cc b/runtime/onert/backend/trix/trix.cc
new file mode 100644
index 000000000..816fb4406
--- /dev/null
+++ b/runtime/onert/backend/trix/trix.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::trix::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
index 5102e32dd..fe93fccc0 100644
--- a/runtime/onert/backend/xnnpack/ops/OperationUtils.h
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -17,10 +17,10 @@
#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
-// duplicated from cpu/ops/OperationUtils.h
+#include <ir/DataType.h>
#include <ir/InternalType.h>
#include <ir/Padding.h>
-#include <ir/DataType.h>
+#include <util/CalculateActivationRange.h>
namespace onert
{
@@ -32,40 +32,7 @@ namespace ops
{
using OperandType = ir::DataType;
-
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::SIGMOID)
- {
- *activation_min = 0;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- throw std::runtime_error{"Unsupported fused activation function"};
- }
-}
+using namespace onert::util; // CalculateActivationRange
} // namespace ops
} // namespace xnnpack
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index d3ef6d4af..10ca8e9fc 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -60,6 +60,7 @@ public:
private:
void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
void dumpLowerInfo();
+ void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
private:
ir::Graph _graph;
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h
index e77c308ea..0ec0e0711 100644
--- a/runtime/onert/core/include/ir/DataType.h
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -38,6 +38,7 @@ enum class DataType
QUANT_INT8_ASYMM = 9,
QUANT_INT16_ASYMM = 10,
QUANT_INT8_SYMM_PER_CHANNEL = 11,
+ QUANT_INT16_SYMM = 12,
};
size_t sizeOfDataType(DataType data_type);
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 0eb45e1ee..4602fafec 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -24,6 +24,7 @@
#include "ir/operation/BCQGather.h"
#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
+#include "ir/operation/Bulk.h"
#include "ir/operation/Comparison.h"
#include "ir/operation/Concat.h"
#include "ir/operation/Conv2D.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index f17fdfdd7..f37d89505 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -27,6 +27,7 @@ OP(BCQFullyConnected)
OP(BCQGather)
OP(BinaryArithmetic)
OP(BroadcastTo)
+OP(Bulk)
OP(Comparison)
OP(Concat)
OP(Conv2D)
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h
new file mode 100644
index 000000000..1825f7fad
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_BULK_H__
+#define __ONERT_IR_OPERATION_BULK_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Bulk : public Operation
+{
+public:
+ struct Param
+ {
+ std::string binary_path;
+ };
+
+public:
+ Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Bulk; }
+ const Param &param() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_BULK_H__
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h
new file mode 100644
index 000000000..db76f9dde
--- /dev/null
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace util
+{
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported fused activation function."};
+ }
+}
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 93792dd1c..6a1d8fcec 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -64,6 +64,52 @@ std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to
return opbackends;
}
+void verboseOptions(compiler::CompilerOptions &options)
+{
+ VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+ VERBOSE(Compiler) << "backend_list : "
+ << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(),
+ "/")
+ << std::endl;
+ VERBOSE(Compiler) << "trace_filepath : " << options.trace_filepath << std::endl;
+ VERBOSE(Compiler) << "graph_dump_level : " << options.graph_dump_level << std::endl;
+ VERBOSE(Compiler) << "executor : " << options.executor << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : "
+ << options.manual_scheduler_options.backend_for_all << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(options.manual_scheduler_options.opcode_to_backend)
+ << std::endl;
+ VERBOSE(Compiler) << "he_scheduler : " << options.he_scheduler << std::endl;
+ VERBOSE(Compiler) << "he_profiling_mode : " << options.he_profiling_mode << std::endl;
+ VERBOSE(Compiler) << "disable_compile : " << options.disable_compile << std::endl;
+ VERBOSE(Compiler) << "fp16_enable : " << options.fp16_enable << std::endl
+ << std::noboolalpha;
+}
+
+void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs,
+ const std::string &str)
+{
+ // TODO Support multiple subgraphs for manual scheduling
+ auto key_val_list = nnfw::misc::split(str, ';');
+ for (const auto &key_val_str : key_val_list)
+ {
+ if (key_val_str.empty())
+ {
+ continue;
+ }
+
+ auto key_val = nnfw::misc::split(key_val_str, '=');
+ const auto &key_str = key_val.at(0);
+ const auto &val = key_val.at(1);
+ auto key = static_cast<uint32_t>(std::stoi(key_str));
+
+ subgs.at(ir::SubgraphIndex{0})
+ ->operations()
+ .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
+ ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+ }
+}
+
} // namespace
namespace onert
@@ -104,26 +150,8 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
#undef OP
// Index to Backend
- // TODO Support multiple subgraphs for manual scheduling
auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- auto key_val_list = nnfw::misc::split(map_str, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
-
- subgs.at(ir::SubgraphIndex{0})
- ->operations()
- .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
- ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
- }
+ setBackendMap(ms_options, subgs, map_str);
}
return options;
}
@@ -143,22 +171,10 @@ void Compiler::enableToFp16() { _options.fp16_enable = true; }
void Compiler::set_backend_from_str(const char *backend_settings)
{
+ assert(_subgraphs != nullptr);
// Backend for all
auto &ms_options = _options.manual_scheduler_options;
- auto key_val_list = nnfw::misc::split(backend_settings, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
- ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
- }
+ setBackendMap(ms_options, *_subgraphs, std::string{backend_settings});
}
void Compiler::checkProfilerConditions()
@@ -344,26 +360,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
_options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
}
- {
- VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
- VERBOSE(Compiler) << "backend_list : "
- << nnfw::misc::join(_options.backend_list.begin(),
- _options.backend_list.end(), "/")
- << std::endl;
- VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl;
- VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
- VERBOSE(Compiler) << "manual backend_for_all : "
- << _options.manual_scheduler_options.backend_for_all << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : "
- << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
- << std::endl;
- VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
- VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
- VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl
- << std::noboolalpha;
- }
+ verboseOptions(_options);
_subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
// Mandatory passes
@@ -544,26 +541,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
_options.tracing_ctx = nullptr;
}
- {
- VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
- VERBOSE(Compiler) << "backend_list : "
- << nnfw::misc::join(_options.backend_list.begin(),
- _options.backend_list.end(), "/")
- << std::endl;
- VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl;
- VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
- VERBOSE(Compiler) << "manual backend_for_all : "
- << _options.manual_scheduler_options.backend_for_all << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : "
- << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
- << std::endl;
- VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
- VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
- VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl
- << std::noboolalpha;
- }
+ verboseOptions(_options);
_subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
// Mandatory passes
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index ba038e935..f9db1ca89 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,26 +16,25 @@
#include "ExecutorFactory.h"
-#include <deque>
-#include <functional>
-#include "ir/OperationCloner.h"
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "exec/ExecTime.h"
-#include "compiler/Linear.h"
-#include "compiler/BackendManager.h"
-#include "backend/IPortableTensor.h"
#include "backend/builtin/Config.h"
#include "backend/builtin/KernelGenerator.h"
-#include "backend/builtin/UserTensor.h"
#include "backend/builtin/TensorBuilder.h"
-#include "util/TracingCtx.h"
+#include "backend/builtin/UserTensor.h"
+#include "backend/IPortableTensor.h"
+#include "compiler/BackendManager.h"
+#include "compiler/BackendManager.h"
+#include "compiler/ExecutionBuilder.h"
+#include "compiler/Linear.h"
#include "dumper/text/GraphDumper.h"
+#include "exec/DataflowExecutor.h"
+#include "exec/ExecTime.h"
+#include "exec/ExecutionObservers.h"
+#include "exec/LinearExecutor.h"
+#include "exec/ParallelExecutor.h"
+#include "ir/OperationCloner.h"
+#include "util/TracingCtx.h"
+#include <functional>
#include <memory>
namespace onert
@@ -282,6 +281,42 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
});
}
+void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const backend::BackendContexts &backend_contexts)
+{
+ for (auto &pair : backend_contexts)
+ {
+ auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
+ {
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setExecutorMap(executor_map);
+ }
+ }
+}
+
+std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts)
+{
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
+
+ for (auto &pair : backend_contexts)
+ {
+ // NOTE builtin backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "builtin")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
+ }
+
+ return ordered_contexts;
+}
+
exec::IExecutor *
ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
@@ -311,32 +346,12 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- for (auto &pair : backend_contexts)
- {
- auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
- if (builtin_context != nullptr)
- {
- auto builtin_kernel_gen = builtin_context->kernel_gen;
- builtin_kernel_gen->setTensorRegistries(tensor_regs);
- builtin_kernel_gen->setExecutorMap(executor_map);
- }
- }
+ prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
ExecutionBuilder builder;
// Adjust the order of backends for the upcoming iteration
- std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
- for (auto &pair : backend_contexts)
- {
- // NOTE builtin backend must be processed lastly.
- // This is because of Permute layer's specialty which is the only operation that could have
- // different ITensor objects for the input and the output. And it requires all other backends'
- // tensors are ready to use.
- if (pair.first->config()->id() == "builtin")
- ordered_contexts.emplace_back(pair.first, pair.second.get());
- else
- ordered_contexts.emplace_front(pair.first, pair.second.get());
- }
+ auto ordered_contexts = orderBackendContext(backend_contexts);
// Simulate the execution for deallocation of tensors
std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
@@ -447,32 +462,12 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- for (auto &pair : backend_contexts)
- {
- auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
- if (builtin_context != nullptr)
- {
- auto builtin_kernel_gen = builtin_context->kernel_gen;
- builtin_kernel_gen->setTensorRegistries(tensor_regs);
- builtin_kernel_gen->setExecutorMap(executor_map);
- }
- }
+ prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
ExecutionBuilder builder;
// Adjust the order of backends for the upcoming iteration
- std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
- for (auto &pair : backend_contexts)
- {
- // NOTE builtin backend must be processed lastly.
- // This is because of Permute layer's specialty which is the only operation that could have
- // different ITensor objects for the input and the output. And it requires all other backends'
- // tensors are ready to use.
- if (pair.first->config()->id() == "builtin")
- ordered_contexts.emplace_back(pair.first, pair.second.get());
- else
- ordered_contexts.emplace_front(pair.first, pair.second.get());
- }
+ auto ordered_contexts = orderBackendContext(backend_contexts);
// Generate kernels
for (auto &pair : ordered_contexts)
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 5fe1617a6..2ee05fae3 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -17,12 +17,14 @@
#ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__
#define __ONERT_COMPILER_EXECUTOR_FACTORY_H__
-#include <unordered_map>
+#include "TensorRegistries.h"
#include "backend/ITensor.h"
-#include "exec/IExecutor.h"
#include "compiler/LoweredGraph.h"
-#include "TensorRegistries.h"
+#include "exec/IExecutor.h"
+
+#include <deque>
+#include <unordered_map>
namespace onert
{
@@ -45,6 +47,12 @@ private:
private:
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts);
+ static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const backend::BackendContexts &backend_contexts);
+ static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ orderBackendContext(const backend::BackendContexts &backend_contexts);
+
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 3b84d02de..999bffa7c 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -42,85 +42,19 @@ namespace compiler
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
- // set tracing_ctx for copied graph
- if (options.tracing_ctx)
- {
- auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
- options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
- }
-
- // Build backend contexts
- auto &backend_manager = BackendManager::get();
- // Create contexts for other backends
- for (auto backend_str : options.backend_list)
- {
- backend_manager.loadBackend(backend_str);
- auto backend = backend_manager.get(backend_str);
-
- // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some
- // are not available on x64 or some other platforms. So this may be a workaround for x64 and
- // we should change it back(throw if backend is not loaded) later.
- if (!backend)
- {
- VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
- continue;
- }
- }
- if (backend_manager.num_backends() == 0)
- throw std::runtime_error{"No available backends loaded."};
-
- // TODO Move "schedule" phase out of here
- // Schedule
- std::unique_ptr<BackendResolver> backend_resolver;
- auto all_backends = backend_manager.getAll();
- if (options.he_scheduler)
- {
- auto scheduler = HEScheduler(all_backends, options);
- backend_resolver = scheduler.schedule(_graph);
- _indexed_ranks = scheduler.getIndexedRanks();
- }
- else
- {
- auto scheduler = ManualScheduler(all_backends, options);
- backend_resolver = scheduler.schedule(_graph);
- }
-
- makeLowerInfo(*backend_resolver);
- VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
- dumper::text::dumpLoweredGraph(*this);
-
- // Mandatory passes - kind of legalization(?)
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantInsertionPass>(*this))
- .append(std::make_unique<pass::ConstantLoweringPass>(*this))
- .append(std::make_unique<pass::PermutationOperationPass>(*this))
- .append(std::make_unique<pass::PermutationInsertionPass>(*this))
- .run();
-
- dumpLowerInfo();
-
- // Optimization passes (optional)
- pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
-
- VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
- for (auto operand : _graph.getInputs())
- VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
- for (auto operand : _graph.getOutputs())
- VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
- dumper::text::dumpLoweredGraph(*this);
-
- // Graph verifications
- {
- assert(ir::verifier::InputOutputChecker().verify(_graph));
- assert(ir::verifier::DAGChecker().verify(_graph));
- assert(ir::verifier::EdgeChecker().verify(_graph));
- }
+ lowerGraph(graph, options);
}
+// TODO Design better class and constructor to represent parent_graph
LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
const CompilerOptions &options)
: _graph{graph}, _parent_graph{parent_graph}
{
+ lowerGraph(graph, options);
+}
+
+void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options)
+{
// set tracing_ctx for copied graph
if (options.tracing_ctx)
{
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 8e343cffa..eb54b67ae 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -145,6 +145,9 @@ protected:
case ir::DataType::INT64:
permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
break;
+ case ir::DataType::QUANT_INT16_SYMM:
+ permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
default:
throw std::runtime_error("IPermuteFunction: Not supported data type");
break;
@@ -338,6 +341,8 @@ protected:
case ir::DataType::QUANT_INT8_ASYMM:
case ir::DataType::QUANT_INT8_SYMM:
return typeid(int8_t);
+ case ir::DataType::QUANT_INT16_SYMM:
+ return typeid(int16_t);
default:
throw std::runtime_error("IPermuteFunction: Not supported data type");
}
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
index 8e75c4f53..07670c720 100644
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -50,6 +50,8 @@ size_t sizeOfDataType(DataType data_type)
return sizeof(int64_t);
case DataType::QUANT_INT16_ASYMM:
return sizeof(int16_t);
+ case DataType::QUANT_INT16_SYMM:
+ return sizeof(int16_t);
default:
throw std::runtime_error{"Unsupported type size"};
}
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index 80e2a3f7a..0b596ff13 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -29,19 +29,21 @@ using namespace operation;
namespace
{
-void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
+
+// Dump all input and output.
+// Use this function when there is no special input or(and) output.
+void dumpOpGeneric(const Operation &node, const std::string &adding_input = "")
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs() << ")" << std::endl;
}
-void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(1)
- << ") " << adding_input << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
+ << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
@@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type)
<< node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-
-void dumpPackingOp(const Operation &node)
-{
- VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
} // namespace
OperationDumper::OperationDumper(const std::string &start_msg)
@@ -86,7 +76,7 @@ void OperationDumper::visit(const BatchToSpaceND &node)
std::string block_size =
"BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
")";
- dumpUnaryInputOp(node, block_size);
+ dumpOpGeneric(node, block_size);
}
void OperationDumper::visit(const BCQFullyConnected &node)
@@ -103,13 +93,13 @@ void OperationDumper::visit(const BCQFullyConnected &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Conv2D &node)
{
@@ -118,11 +108,11 @@ void OperationDumper::visit(const Conv2D &node)
dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const DepthwiseConv2D &node)
{
@@ -143,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node)
{
params = " alpha value(" + std::to_string(node.param().alpha) + ")";
}
- dumpUnaryInputOp(node, params);
+ dumpOpGeneric(node, params);
}
-void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const EmbeddingLookup &node)
{
@@ -208,9 +198,9 @@ void OperationDumper::visit(const InstanceNorm &node)
dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const LSTM &node)
{
@@ -258,7 +248,7 @@ void OperationDumper::visit(const LSTM &node)
<< node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
-void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Pad &node)
{
@@ -297,16 +287,16 @@ void OperationDumper::visit(const Pool2D &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const PReLU &node)
{
std::string alpha =
"Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
- dumpUnaryInputOp(node, alpha);
+ dumpOpGeneric(node, alpha);
}
-void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
@@ -320,37 +310,9 @@ void OperationDumper::visit(const Reshape &node)
dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node)
-{
- if (node.getInputs().size() == 1)
- {
- dumpUnaryInputOp(node);
- }
- else if (node.getInputs().size() == 2)
- {
- dumpBinaryInputOp(node);
- }
- else
- {
- VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
- }
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const ResizeNearestNeighbor &node)
-{
- if (node.getInputs().size() == 1)
- {
- dumpUnaryInputOp(node);
- }
- else if (node.getInputs().size() == 2)
- {
- dumpBinaryInputOp(node);
- }
- else
- {
- VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
- }
-}
+void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Reverse &node)
{
@@ -391,9 +353,9 @@ void OperationDumper::visit(const Select &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const SpaceToBatchND &node)
{
@@ -404,11 +366,11 @@ void OperationDumper::visit(const SpaceToBatchND &node)
dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Split &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const StatelessRandomUniform &node)
{
@@ -419,7 +381,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
@@ -454,22 +416,14 @@ void OperationDumper::visit(const TransposeConv &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Unpack &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
<< std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
void OperationDumper::visit(const OneHot &node)
@@ -483,51 +437,21 @@ void OperationDumper::visit(const OneHot &node)
void OperationDumper::visit(const If &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
VERBOSE(LIR) << " - Inputs : "
<< "Then subgraph (" << node.param().then_subg_index << ") Else subgraph ("
- << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
+ << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
void OperationDumper::visit(const While &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
VERBOSE(LIR) << " - Inputs : "
<< "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
- << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
+ << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc
index 110aeebe7..a51e12dff 100644
--- a/runtime/onert/core/src/ir/operation/AddN.cc
+++ b/runtime/onert/core/src/ir/operation/AddN.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/AddN.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
index e918d27ae..ccda674ad 100644
--- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BCQFullyConnected.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc
index f9dfaa3f6..1ca5b0c9f 100644
--- a/runtime/onert/core/src/ir/operation/BCQGather.cc
+++ b/runtime/onert/core/src/ir/operation/BCQGather.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BCQGather.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
index e58e0f486..3c5578ac4 100644
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BatchToSpaceND.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 2d439194f..5eb3fc3d7 100644
--- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/BinaryArithmetic.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
index 5da7b5abc..eab6c0611 100644
--- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc
+++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BroadcastTo.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc
new file mode 100644
index 000000000..4b96c9d94
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Bulk.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Bulk.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Bulk::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Bulk::Param &param)
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc
index 94c96ff69..33365657c 100644
--- a/runtime/onert/core/src/ir/operation/Comparison.cc
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Comparison.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc
index 5d99debb7..3a21e36f2 100644
--- a/runtime/onert/core/src/ir/operation/Concat.cc
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Concat.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc
index 725f3e70b..d615ae416 100644
--- a/runtime/onert/core/src/ir/operation/Conv2D.cc
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Conv2D.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
index 822eb30a9..365745ea8 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ConvertFp16ToFp32.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
index 5e5b42f3b..d4fc7031c 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ConvertFp32ToFp16.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
index 197c7ee48..e3edea777 100644
--- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/DepthToSpace.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
index bef75c5cf..0e7137306 100644
--- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/DepthwiseConv2D.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
index f3e942f7d..e83c26e28 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseActivation.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
index 155b660dc..b22bed7bc 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseBinary.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index c21c51c05..fd463e0fe 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseUnary.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
index e23674706..66b80b2c5 100644
--- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/EmbeddingLookup.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc
index 50e3636f3..e421bc383 100644
--- a/runtime/onert/core/src/ir/operation/ExpandDims.cc
+++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ExpandDims.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc
index 4a13737d4..60355c609 100644
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Fill.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc
index 335b7b209..3533df097 100644
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/FullyConnected.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc
index 96a39b3f2..e0c4630a0 100644
--- a/runtime/onert/core/src/ir/operation/Gather.cc
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Gather.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
index 2974679d4..5d1589cd1 100644
--- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/HashtableLookup.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
index d9af9d0b7..9fb55383e 100644
--- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/InstanceNorm.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc
index 0184ef628..6725df596 100644
--- a/runtime/onert/core/src/ir/operation/L2Normalization.cc
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/L2Normalization.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc
index 45a1fd120..06e66158b 100644
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LSTM.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
index 52037cc72..73fca9938 100644
--- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LocalResponseNormalization.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
index 51f6a6c5d..d580e63e1 100644
--- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc
+++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LogSoftmax.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
index 6046e36fe..e52bddc1f 100644
--- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
+++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/MatrixBandPart.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc
index 5ed31c2b9..87bd12e60 100644
--- a/runtime/onert/core/src/ir/operation/PReLU.cc
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/PReLU.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc
index 571965de8..813fbaf30 100644
--- a/runtime/onert/core/src/ir/operation/Permute.cc
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Permute.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
index cbb42a80a..e32b876e6 100644
--- a/runtime/onert/core/src/ir/operation/Pool2D.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/Pool2D.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc
index f1df54c60..f7c159a12 100644
--- a/runtime/onert/core/src/ir/operation/Pow.cc
+++ b/runtime/onert/core/src/ir/operation/Pow.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Pow.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc
index a40e5bdc9..988a50669 100644
--- a/runtime/onert/core/src/ir/operation/RNN.cc
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/RNN.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc
index f85d52cb0..8ced92a0b 100644
--- a/runtime/onert/core/src/ir/operation/Range.cc
+++ b/runtime/onert/core/src/ir/operation/Range.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Range.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index c33ed0a80..40797bf29 100644
--- a/runtime/onert/core/src/ir/operation/Rank.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Rank.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc
index 0811f1c37..8da1940fa 100644
--- a/runtime/onert/core/src/ir/operation/Reduce.cc
+++ b/runtime/onert/core/src/ir/operation/Reduce.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/Reduce.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc
index 54c12574a..0ed4affa1 100644
--- a/runtime/onert/core/src/ir/operation/Reshape.cc
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Reshape.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
index 7c9f5e104..7d256f447 100644
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ResizeBilinear.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 9792b292d..58be87b95 100644
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ResizeNearestNeighbor.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc
index 471457739..6c3746426 100644
--- a/runtime/onert/core/src/ir/operation/Reverse.cc
+++ b/runtime/onert/core/src/ir/operation/Reverse.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Reverse.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc
index 1b2cd6241..f90924488 100644
--- a/runtime/onert/core/src/ir/operation/Shape.cc
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Shape.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc
index 91850fa33..c06c85309 100644
--- a/runtime/onert/core/src/ir/operation/Softmax.cc
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Softmax.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
index 97c630888..94acccb0c 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SpaceToBatchND.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
index e1fd27a55..08e7e5190 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SpaceToDepth.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc
index 96822822b..3e371188d 100644
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/Split.h"
-#include <cassert>
#include "ir/OperationVisitor.h"
+
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc
index 38918cd81..be13f167e 100644
--- a/runtime/onert/core/src/ir/operation/SplitV.cc
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/SplitV.h"
-#include <cassert>
#include "ir/OperationVisitor.h"
+
namespace onert
{
namespace ir
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
index 705b60abc..db93903c7 100644
--- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SquaredDifference.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
index 18f1cf5a6..94be0be86 100644
--- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/StatelessRandomUniform.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc
index e8278b456..a38282c93 100644
--- a/runtime/onert/core/src/ir/operation/StridedSlice.cc
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/StridedSlice.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc
index 0ec785579..51c1ff1dc 100644
--- a/runtime/onert/core/src/ir/operation/Tile.cc
+++ b/runtime/onert/core/src/ir/operation/Tile.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Tile.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc
index a1f39202d..e1723d180 100644
--- a/runtime/onert/core/src/ir/operation/TopKV2.cc
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/TopKV2.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc
index f2ee52f0e..dbc5ef2aa 100644
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Transpose.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc
index 1f405dc6b..944cc365d 100644
--- a/runtime/onert/core/src/ir/operation/TransposeConv.cc
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/TransposeConv.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc
index 90d3c0c07..185eddce3 100644
--- a/runtime/onert/core/src/ir/operation/Unpack.cc
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/Unpack.h"
#include "ir/OperationVisitor.h"
diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc
index 8a6f5c01e..f35996b07 100644
--- a/runtime/onert/core/src/ir/operation/While.cc
+++ b/runtime/onert/core/src/ir/operation/While.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/While.h"
#include "ir/OperationVisitor.h"
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 6ba7ee922..5649f286d 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -96,6 +96,7 @@ protected:
ir::Activation convertActivation(ActivationFunctionType type);
ir::DataType tensorTypeToDataType(TensorType type);
ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx);
+ flexbuffers::Map getCustomOpAttrMap(const Operator *op);
// Create operands form tflite::Tensor
ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
@@ -110,6 +111,16 @@ protected:
void loadStridesAndPaddings(Param &param, const OptionsType *options);
// Load Pool2D param
template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
+ // Get BuiltinOperator
+ BuiltinOperator getBuiltinOperator(const Operator *op)
+ {
+ auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index());
+ auto builtin_op = builtin_opcode->builtin_code();
+ if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+ builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
+
+ return builtin_op;
+ }
private:
virtual std::unique_ptr<ir::Graph> loadSubgraph(const SubGraph *subg) = 0;
@@ -291,6 +302,15 @@ ir::OperandIndex BaseLoader<LoaderDomain>::BaseLoader::tensorIdxToOperandIdx(int
return isOptionalInputTensor(tensorIdx) ? ir::OperandIndex() : _tensor_to_operand[tensorIdx];
}
+template <typename LoaderDomain>
+flexbuffers::Map BaseLoader<LoaderDomain>::BaseLoader::getCustomOpAttrMap(const Operator *op)
+{
+ size_t custom_op_data_size = op->custom_options()->size();
+ auto custom_op_data = op->custom_options()->Data();
+ auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+ return data_root.AsMap();
+}
+
/* Copy is copied from tensorflow lite */
template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
{
@@ -545,7 +565,7 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn
{
// Optional tensors are not supported yet except for FULLY_CONNECTED and BCQ_FULLY_CONNECTED
auto check_optional_input = [&]() {
- auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ auto builtin_code = getBuiltinOperator(op);
if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
throw std::runtime_error(
std::string("loader doesn't support optional input tensor yet for ")
@@ -748,10 +768,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
}
else
{
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = op->custom_options()->Data();
- auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
- auto attr_map = data_root.AsMap();
+ const auto attr_map = getCustomOpAttrMap(op);
const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
attr_map["fused_activation_function"].AsInt8());
param.activation = convertActivation(fused_activation_func);
@@ -876,10 +893,7 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg
}
else
{
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = op->custom_options()->Data();
- auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
- auto attr_map = data_root.AsMap();
+ const auto attr_map = getCustomOpAttrMap(op);
param.keep_dims = attr_map["keep_dims"].AsBool();
}
@@ -931,8 +945,7 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg)
{
- const flexbuffers::Map &m =
- flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap();
+ const auto &m = getCustomOpAttrMap(op);
ir::operation::DetectionPostProcess::Param param;
@@ -972,14 +985,17 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
{
ir::operation::BatchMatMul::Param param;
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ const auto builtin_op = getBuiltinOperator(op);
switch (builtin_op)
{
case BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
- param.adj_x = op->builtin_options_as_BatchMatMulOptions()->adjoint_lhs();
- param.adj_y = op->builtin_options_as_BatchMatMulOptions()->adjoint_rhs();
- break;
+ // Handled on each loader: different option name
+ // Circle: adjoint_lhs, adjoint_rhs
+ // TFLite: adj_x, adj_y
+ throw std::runtime_error(
+ std::string("Cannot handle here: ").append(EnumNameBuiltinOperator(builtin_op)) + " as " +
+ EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
case BuiltinOperator::BuiltinOperator_CUSTOM:
if (op->custom_options() == nullptr)
{
@@ -988,10 +1004,7 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
}
else
{
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = op->custom_options()->Data();
- auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
- auto attr_map = data_root.AsMap();
+ const auto attr_map = getCustomOpAttrMap(op);
param.adj_x = attr_map["adj_x"].AsBool();
param.adj_y = attr_map["adj_y"].AsBool();
}
@@ -1184,7 +1197,7 @@ template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &subg)
{
ir::operation::Comparison::Param param;
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ const auto builtin_op = getBuiltinOperator(op);
switch (builtin_op)
{
@@ -1224,10 +1237,7 @@ void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg)
}
else
{
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = op->custom_options()->Data();
- auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
- auto attr_map = data_root.AsMap();
+ const auto attr_map = getCustomOpAttrMap(op);
param.equation = attr_map["equation"].ToString();
}
@@ -1247,10 +1257,7 @@ void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph
}
else
{
- size_t custom_op_data_size = op->custom_options()->size();
- auto custom_op_data = op->custom_options()->Data();
- auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
- auto attr_map = data_root.AsMap();
+ const auto attr_map = getCustomOpAttrMap(op);
param.is_training = attr_map["is_training"].AsBool();
param.epsilon = attr_map["epsilon"].AsFloat();
param.data_format = attr_map["data_format"].ToString();
@@ -1363,7 +1370,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
// loader doesn't support optional output tensor yet
if (op->outputs()->size() != 1)
{
- auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ auto builtin_code = getBuiltinOperator(op);
throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
.append(EnumNameBuiltinOperator(builtin_code)));
}
@@ -1381,7 +1388,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg)
{
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ auto const builtin_op = getBuiltinOperator(op);
switch (builtin_op)
{
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 4fb0e71d6..aae831d61 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -72,6 +72,12 @@ struct LoaderDomain
class CircleLoader final : public base_loader::BaseLoader<LoaderDomain>
{
protected:
+ // Different option name
+ // Circle: adjoint_lhs, adjoint_rhs
+ // TFLite: adj_x, adj_y
+ void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
+ // Only circle operations
void loadInstanceNorm(const Operator *op, ir::Graph &subg);
void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
void loadBCQGather(const Operator *op, ir::Graph &subg);
@@ -129,10 +135,13 @@ private:
void loadOperation(const circle::Operator *op, ir::Graph &subg)
{
- const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+ auto const builtin_op = getBuiltinOperator(op);
switch (builtin_op)
{
+ case circle::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+ loadBatchMatMul(op, subg);
+ return;
case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM:
loadInstanceNorm(op, subg);
return;
@@ -149,6 +158,23 @@ private:
}
};
+void CircleLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::BatchMatMul::Param param;
+ const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+ param.adj_x = options->adjoint_lhs();
+ param.adj_y = options->adjoint_rhs();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
index eb1775297..e3c92eae0 100644
--- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
@@ -51,6 +51,9 @@ struct TensorBuilder;
struct Conv2DOptions;
struct Conv2DOptionsBuilder;
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
+
struct Pool2DOptions;
struct Pool2DOptionsBuilder;
@@ -327,6 +330,9 @@ struct MatrixSetDiagOptionsBuilder;
struct IfOptions;
struct IfOptionsBuilder;
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
+
struct WhileOptions;
struct WhileOptionsBuilder;
@@ -351,6 +357,39 @@ struct SegmentSumOptionsBuilder;
struct BatchMatMulOptions;
struct BatchMatMulOptionsBuilder;
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+
struct BCQGatherOptions;
struct BCQGatherOptionsBuilder;
@@ -375,10 +414,16 @@ struct BufferBuilder;
struct Metadata;
struct MetadataBuilder;
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
+
struct Model;
struct ModelBuilder;
-enum TensorType
+enum TensorType : int8_t
{
TensorType_FLOAT32 = 0,
TensorType_FLOAT16 = 1,
@@ -391,36 +436,43 @@ enum TensorType
TensorType_COMPLEX64 = 8,
TensorType_INT8 = 9,
TensorType_FLOAT64 = 10,
+ TensorType_COMPLEX128 = 11,
+ TensorType_UINT64 = 12,
+ TensorType_RESOURCE = 13,
+ TensorType_VARIANT = 14,
+ TensorType_UINT32 = 15,
TensorType_MIN = TensorType_FLOAT32,
- TensorType_MAX = TensorType_FLOAT64
+ TensorType_MAX = TensorType_UINT32
};
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[16]
{
- static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
- TensorType_UINT8, TensorType_INT64, TensorType_STRING,
- TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
- TensorType_INT8, TensorType_FLOAT64};
+ static const TensorType values[] = {
+ TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8,
+ TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16,
+ TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, TensorType_COMPLEX128,
+ TensorType_UINT64, TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32};
return values;
}
inline const char *const *EnumNamesTensorType()
{
- static const char *const names[12] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
- "INT64", "STRING", "BOOL", "INT16",
- "COMPLEX64", "INT8", "FLOAT64", nullptr};
+ static const char *const names[17] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64",
+ "STRING", "BOOL", "INT16", "COMPLEX64", "INT8",
+ "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", "VARIANT",
+ "UINT32", nullptr};
return names;
}
inline const char *EnumNameTensorType(TensorType e)
{
- if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_FLOAT64))
+ if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32))
return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesTensorType()[index];
}
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
{
QuantizationDetails_NONE = 0,
QuantizationDetails_CustomQuantization = 1,
@@ -465,7 +517,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum DimensionType
+enum DimensionType : int8_t
{
DimensionType_DENSE = 0,
DimensionType_SPARSE_CSR = 1,
@@ -493,7 +545,7 @@ inline const char *EnumNameDimensionType(DimensionType e)
return EnumNamesDimensionType()[index];
}
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
{
SparseIndexVector_NONE = 0,
SparseIndexVector_Int32Vector = 1,
@@ -552,8 +604,11 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
{
+ BuiltinOperator_BCQ_GATHER = -4,
+ BuiltinOperator_BCQ_FULLY_CONNECTED = -3,
+ BuiltinOperator_INSTANCE_NORM = -2,
BuiltinOperator_ADD = 0,
BuiltinOperator_AVERAGE_POOL_2D = 1,
BuiltinOperator_CONCATENATION = 2,
@@ -681,16 +736,36 @@ enum BuiltinOperator
BuiltinOperator_DENSIFY = 124,
BuiltinOperator_SEGMENT_SUM = 125,
BuiltinOperator_BATCH_MATMUL = 126,
- BuiltinOperator_BCQ_GATHER = 252,
- BuiltinOperator_BCQ_FULLY_CONNECTED = 253,
- BuiltinOperator_INSTANCE_NORM = 254,
- BuiltinOperator_MIN = BuiltinOperator_ADD,
- BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM
-};
-
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
-{
- static const BuiltinOperator values[] = {BuiltinOperator_ADD,
+ BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ BuiltinOperator_CUMSUM = 128,
+ BuiltinOperator_CALL_ONCE = 129,
+ BuiltinOperator_BROADCAST_TO = 130,
+ BuiltinOperator_RFFT2D = 131,
+ BuiltinOperator_CONV_3D = 132,
+ BuiltinOperator_IMAG = 133,
+ BuiltinOperator_REAL = 134,
+ BuiltinOperator_COMPLEX_ABS = 135,
+ BuiltinOperator_HASHTABLE = 136,
+ BuiltinOperator_HASHTABLE_FIND = 137,
+ BuiltinOperator_HASHTABLE_IMPORT = 138,
+ BuiltinOperator_HASHTABLE_SIZE = 139,
+ BuiltinOperator_REDUCE_ALL = 140,
+ BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+ BuiltinOperator_VAR_HANDLE = 142,
+ BuiltinOperator_READ_VARIABLE = 143,
+ BuiltinOperator_ASSIGN_VARIABLE = 144,
+ BuiltinOperator_BROADCAST_ARGS = 145,
+ BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+ BuiltinOperator_MIN = BuiltinOperator_BCQ_GATHER,
+ BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[150]
+{
+ static const BuiltinOperator values[] = {BuiltinOperator_BCQ_GATHER,
+ BuiltinOperator_BCQ_FULLY_CONNECTED,
+ BuiltinOperator_INSTANCE_NORM,
+ BuiltinOperator_ADD,
BuiltinOperator_AVERAGE_POOL_2D,
BuiltinOperator_CONCATENATION,
BuiltinOperator_CONV_2D,
@@ -817,15 +892,36 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
BuiltinOperator_DENSIFY,
BuiltinOperator_SEGMENT_SUM,
BuiltinOperator_BATCH_MATMUL,
- BuiltinOperator_BCQ_GATHER,
- BuiltinOperator_BCQ_FULLY_CONNECTED,
- BuiltinOperator_INSTANCE_NORM};
+ BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+ BuiltinOperator_CUMSUM,
+ BuiltinOperator_CALL_ONCE,
+ BuiltinOperator_BROADCAST_TO,
+ BuiltinOperator_RFFT2D,
+ BuiltinOperator_CONV_3D,
+ BuiltinOperator_IMAG,
+ BuiltinOperator_REAL,
+ BuiltinOperator_COMPLEX_ABS,
+ BuiltinOperator_HASHTABLE,
+ BuiltinOperator_HASHTABLE_FIND,
+ BuiltinOperator_HASHTABLE_IMPORT,
+ BuiltinOperator_HASHTABLE_SIZE,
+ BuiltinOperator_REDUCE_ALL,
+ BuiltinOperator_CONV_3D_TRANSPOSE,
+ BuiltinOperator_VAR_HANDLE,
+ BuiltinOperator_READ_VARIABLE,
+ BuiltinOperator_ASSIGN_VARIABLE,
+ BuiltinOperator_BROADCAST_ARGS,
+ BuiltinOperator_RANDOM_STANDARD_NORMAL};
return values;
}
inline const char *const *EnumNamesBuiltinOperator()
{
- static const char *const names[256] = {"ADD",
+ static const char *const names[152] = {"BCQ_GATHER",
+ "BCQ_FULLY_CONNECTED",
+ "INSTANCE_NORM",
+ "",
+ "ADD",
"AVERAGE_POOL_2D",
"CONCATENATION",
"CONV_2D",
@@ -952,147 +1048,40 @@ inline const char *const *EnumNamesBuiltinOperator()
"DENSIFY",
"SEGMENT_SUM",
"BATCH_MATMUL",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "BCQ_GATHER",
- "BCQ_FULLY_CONNECTED",
- "INSTANCE_NORM",
+ "PLACEHOLDER_FOR_GREATER_OP_CODES",
+ "CUMSUM",
+ "CALL_ONCE",
+ "BROADCAST_TO",
+ "RFFT2D",
+ "CONV_3D",
+ "IMAG",
+ "REAL",
+ "COMPLEX_ABS",
+ "HASHTABLE",
+ "HASHTABLE_FIND",
+ "HASHTABLE_IMPORT",
+ "HASHTABLE_SIZE",
+ "REDUCE_ALL",
+ "CONV_3D_TRANSPOSE",
+ "VAR_HANDLE",
+ "READ_VARIABLE",
+ "ASSIGN_VARIABLE",
+ "BROADCAST_ARGS",
+ "RANDOM_STANDARD_NORMAL",
nullptr};
return names;
}
inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
{
- if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_INSTANCE_NORM))
+ if (flatbuffers::IsOutRange(e, BuiltinOperator_BCQ_GATHER,
+ BuiltinOperator_RANDOM_STANDARD_NORMAL))
return "";
- const size_t index = static_cast<size_t>(e);
+ const size_t index = static_cast<size_t>(e) - static_cast<size_t>(BuiltinOperator_BCQ_GATHER);
return EnumNamesBuiltinOperator()[index];
}
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
{
BuiltinOptions_NONE = 0,
BuiltinOptions_Conv2DOptions = 1,
@@ -1196,6 +1185,19 @@ enum BuiltinOptions
BuiltinOptions_DensifyOptions = 99,
BuiltinOptions_SegmentSumOptions = 100,
BuiltinOptions_BatchMatMulOptions = 101,
+ BuiltinOptions_CumsumOptions = 102,
+ BuiltinOptions_CallOnceOptions = 103,
+ BuiltinOptions_BroadcastToOptions = 104,
+ BuiltinOptions_Rfft2dOptions = 105,
+ BuiltinOptions_Conv3DOptions = 106,
+ BuiltinOptions_HashtableOptions = 107,
+ BuiltinOptions_HashtableFindOptions = 108,
+ BuiltinOptions_HashtableImportOptions = 109,
+ BuiltinOptions_HashtableSizeOptions = 110,
+ BuiltinOptions_VarHandleOptions = 111,
+ BuiltinOptions_ReadVariableOptions = 112,
+ BuiltinOptions_AssignVariableOptions = 113,
+ BuiltinOptions_RandomOptions = 114,
BuiltinOptions_BCQGatherOptions = 252,
BuiltinOptions_BCQFullyConnectedOptions = 253,
BuiltinOptions_InstanceNormOptions = 254,
@@ -1203,7 +1205,7 @@ enum BuiltinOptions
BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
};
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[118]
{
static const BuiltinOptions values[] = {BuiltinOptions_NONE,
BuiltinOptions_Conv2DOptions,
@@ -1307,6 +1309,19 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
BuiltinOptions_DensifyOptions,
BuiltinOptions_SegmentSumOptions,
BuiltinOptions_BatchMatMulOptions,
+ BuiltinOptions_CumsumOptions,
+ BuiltinOptions_CallOnceOptions,
+ BuiltinOptions_BroadcastToOptions,
+ BuiltinOptions_Rfft2dOptions,
+ BuiltinOptions_Conv3DOptions,
+ BuiltinOptions_HashtableOptions,
+ BuiltinOptions_HashtableFindOptions,
+ BuiltinOptions_HashtableImportOptions,
+ BuiltinOptions_HashtableSizeOptions,
+ BuiltinOptions_VarHandleOptions,
+ BuiltinOptions_ReadVariableOptions,
+ BuiltinOptions_AssignVariableOptions,
+ BuiltinOptions_RandomOptions,
BuiltinOptions_BCQGatherOptions,
BuiltinOptions_BCQFullyConnectedOptions,
BuiltinOptions_InstanceNormOptions};
@@ -1417,19 +1432,19 @@ inline const char *const *EnumNamesBuiltinOptions()
"DensifyOptions",
"SegmentSumOptions",
"BatchMatMulOptions",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
+ "CumsumOptions",
+ "CallOnceOptions",
+ "BroadcastToOptions",
+ "Rfft2dOptions",
+ "Conv3DOptions",
+ "HashtableOptions",
+ "HashtableFindOptions",
+ "HashtableImportOptions",
+ "HashtableSizeOptions",
+ "VarHandleOptions",
+ "ReadVariableOptions",
+ "AssignVariableOptions",
+ "RandomOptions",
"",
"",
"",
@@ -2092,6 +2107,71 @@ template <> struct BuiltinOptionsTraits<circle::BatchMatMulOptions>
static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
};
+template <> struct BuiltinOptionsTraits<circle::CumsumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CallOnceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BroadcastToOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Rfft2dOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Conv3DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableFindOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableImportOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableSizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::VarHandleOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReadVariableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AssignVariableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RandomOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
template <> struct BuiltinOptionsTraits<circle::BCQGatherOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_BCQGatherOptions;
@@ -2112,7 +2192,7 @@ bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum Padding
+enum Padding : int8_t
{
Padding_SAME = 0,
Padding_VALID = 1,
@@ -2140,7 +2220,7 @@ inline const char *EnumNamePadding(Padding e)
return EnumNamesPadding()[index];
}
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
{
ActivationFunctionType_NONE = 0,
ActivationFunctionType_RELU = 1,
@@ -2175,7 +2255,7 @@ inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
return EnumNamesActivationFunctionType()[index];
}
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
{
LSHProjectionType_UNKNOWN = 0,
LSHProjectionType_SPARSE = 1,
@@ -2205,7 +2285,7 @@ inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
return EnumNamesLSHProjectionType()[index];
}
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
{
FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
@@ -2237,7 +2317,7 @@ inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOpti
}
}
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
{
LSTMKernelType_FULL = 0,
LSTMKernelType_BASIC = 1,
@@ -2265,7 +2345,7 @@ inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
return EnumNamesLSTMKernelType()[index];
}
-enum CombinerType
+enum CombinerType : int8_t
{
CombinerType_SUM = 0,
CombinerType_MEAN = 1,
@@ -2294,7 +2374,7 @@ inline const char *EnumNameCombinerType(CombinerType e)
return EnumNamesCombinerType()[index];
}
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
{
MirrorPadMode_REFLECT = 0,
MirrorPadMode_SYMMETRIC = 1,
@@ -2322,7 +2402,7 @@ inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
return EnumNamesMirrorPadMode()[index];
}
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
{
CustomOptionsFormat_FLEXBUFFERS = 0,
CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -2349,7 +2429,7 @@ inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
return EnumNamesCustomOptionsFormat()[index];
}
-enum DataFormat
+enum DataFormat : int8_t
{
DataFormat_CHANNELS_LAST = 0,
DataFormat_CHANNELS_FIRST = 1,
@@ -2408,7 +2488,6 @@ struct CustomQuantizationBuilder
{
start_ = fbb_.StartTable();
}
- CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
flatbuffers::Offset<CustomQuantization> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2539,7 +2618,6 @@ struct QuantizationParametersBuilder
{
start_ = fbb_.StartTable();
}
- QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
flatbuffers::Offset<QuantizationParameters> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2613,7 +2691,6 @@ struct Int32VectorBuilder
{
start_ = fbb_.StartTable();
}
- Int32VectorBuilder &operator=(const Int32VectorBuilder &);
flatbuffers::Offset<Int32Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2670,7 +2747,6 @@ struct Uint16VectorBuilder
{
start_ = fbb_.StartTable();
}
- Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
flatbuffers::Offset<Uint16Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2731,7 +2807,6 @@ struct Uint8VectorBuilder
{
start_ = fbb_.StartTable();
}
- Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
flatbuffers::Offset<Uint8Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2912,7 +2987,6 @@ struct DimensionMetadataBuilder
{
start_ = fbb_.StartTable();
}
- DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
flatbuffers::Offset<DimensionMetadata> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2994,7 +3068,6 @@ struct SparsityParametersBuilder
{
start_ = fbb_.StartTable();
}
- SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
flatbuffers::Offset<SparsityParameters> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3121,7 +3194,6 @@ struct TensorBuilder
{
start_ = fbb_.StartTable();
}
- TensorBuilder &operator=(const TensorBuilder &);
flatbuffers::Offset<Tensor> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3235,7 +3307,6 @@ struct Conv2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
flatbuffers::Offset<Conv2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3260,6 +3331,116 @@ inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
return builder_.Finish();
}
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef Conv3DOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_D = 6,
+ VT_STRIDE_W = 8,
+ VT_STRIDE_H = 10,
+ VT_FUSED_ACTIVATION_FUNCTION = 12,
+ VT_DILATION_D_FACTOR = 14,
+ VT_DILATION_W_FACTOR = 16,
+ VT_DILATION_H_FACTOR = 18
+ };
+ circle::Padding padding() const
+ {
+ return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
+ int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ circle::ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<circle::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct Conv3DOptionsBuilder
+{
+ typedef Conv3DOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(circle::Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_d(int32_t stride_d)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_d_factor(int32_t dilation_d_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<Conv3DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Conv3DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ Conv3DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_dilation_d_factor(dilation_d_factor);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_stride_d(stride_d);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
typedef Pool2DOptionsBuilder Builder;
@@ -3330,7 +3511,6 @@ struct Pool2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
flatbuffers::Offset<Pool2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3431,7 +3611,6 @@ struct DepthwiseConv2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3509,7 +3688,6 @@ struct ConcatEmbeddingsOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3574,7 +3752,6 @@ struct LSHProjectionOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
flatbuffers::Offset<LSHProjectionOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3639,7 +3816,6 @@ struct SVDFOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
flatbuffers::Offset<SVDFOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3704,7 +3880,6 @@ struct RNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
flatbuffers::Offset<RNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3775,7 +3950,6 @@ struct SequenceRNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
flatbuffers::Offset<SequenceRNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3855,7 +4029,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3941,7 +4114,6 @@ struct FullyConnectedOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
flatbuffers::Offset<FullyConnectedOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3990,7 +4162,6 @@ struct SoftmaxOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
flatbuffers::Offset<SoftmaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4043,7 +4214,6 @@ struct ConcatenationOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
flatbuffers::Offset<ConcatenationOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4067,17 +4237,20 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
typedef AddOptionsBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_FUSED_ACTIVATION_FUNCTION = 4
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_POT_SCALE_INT16 = 6
};
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
+ bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
}
};
@@ -4091,11 +4264,15 @@ struct AddOptionsBuilder
fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
}
+ void add_pot_scale_int16(bool pot_scale_int16)
+ {
+ fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+ 1);
+ }
explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- AddOptionsBuilder &operator=(const AddOptionsBuilder &);
flatbuffers::Offset<AddOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4106,9 +4283,11 @@ struct AddOptionsBuilder
inline flatbuffers::Offset<AddOptions> CreateAddOptions(
flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool pot_scale_int16 = true)
{
AddOptionsBuilder builder_(_fbb);
+ builder_.add_pot_scale_int16(pot_scale_int16);
builder_.add_fused_activation_function(fused_activation_function);
return builder_.Finish();
}
@@ -4146,7 +4325,6 @@ struct MulOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MulOptionsBuilder &operator=(const MulOptionsBuilder &);
flatbuffers::Offset<MulOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4197,7 +4375,6 @@ struct L2NormOptionsBuilder
{
start_ = fbb_.StartTable();
}
- L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
flatbuffers::Offset<L2NormOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4263,8 +4440,6 @@ struct LocalResponseNormalizationOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LocalResponseNormalizationOptionsBuilder &
- operator=(const LocalResponseNormalizationOptionsBuilder &);
flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4353,7 +4528,6 @@ struct LSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
flatbuffers::Offset<LSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4445,8 +4619,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UnidirectionalSequenceLSTMOptionsBuilder &
- operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4546,8 +4718,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BidirectionalSequenceLSTMOptionsBuilder &
- operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4608,7 +4778,6 @@ struct ResizeBilinearOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
flatbuffers::Offset<ResizeBilinearOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4632,13 +4801,15 @@ struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
typedef ResizeNearestNeighborOptionsBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_ALIGN_CORNERS = 4
+ VT_ALIGN_CORNERS = 4,
+ VT_HALF_PIXEL_CENTERS = 6
};
bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
+ VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
}
};
@@ -4652,11 +4823,15 @@ struct ResizeNearestNeighborOptionsBuilder
fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
static_cast<uint8_t>(align_corners), 0);
}
+ void add_half_pixel_centers(bool half_pixel_centers)
+ {
+ fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+ static_cast<uint8_t>(half_pixel_centers), 0);
+ }
explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4666,9 +4841,11 @@ struct ResizeNearestNeighborOptionsBuilder
};
inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+ bool half_pixel_centers = false)
{
ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+ builder_.add_half_pixel_centers(half_pixel_centers);
builder_.add_align_corners(align_corners);
return builder_.Finish();
}
@@ -4701,7 +4878,6 @@ struct CallOptionsBuilder
{
start_ = fbb_.StartTable();
}
- CallOptionsBuilder &operator=(const CallOptionsBuilder &);
flatbuffers::Offset<CallOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4736,7 +4912,6 @@ struct PadOptionsBuilder
{
start_ = fbb_.StartTable();
}
- PadOptionsBuilder &operator=(const PadOptionsBuilder &);
flatbuffers::Offset<PadOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4769,7 +4944,6 @@ struct PadV2OptionsBuilder
{
start_ = fbb_.StartTable();
}
- PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
flatbuffers::Offset<PadV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4815,7 +4989,6 @@ struct ReshapeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
flatbuffers::Offset<ReshapeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4859,7 +5032,6 @@ struct SpaceToBatchNDOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4893,7 +5065,6 @@ struct BatchToSpaceNDOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4951,7 +5122,6 @@ struct SkipGramOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
flatbuffers::Offset<SkipGramOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4999,7 +5169,6 @@ struct SpaceToDepthOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
flatbuffers::Offset<SpaceToDepthOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5044,7 +5213,6 @@ struct DepthToSpaceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
flatbuffers::Offset<DepthToSpaceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5066,17 +5234,20 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
typedef SubOptionsBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_FUSED_ACTIVATION_FUNCTION = 4
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_POT_SCALE_INT16 = 6
};
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
+ bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
}
};
@@ -5090,11 +5261,15 @@ struct SubOptionsBuilder
fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
}
+ void add_pot_scale_int16(bool pot_scale_int16)
+ {
+ fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+ 1);
+ }
explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SubOptionsBuilder &operator=(const SubOptionsBuilder &);
flatbuffers::Offset<SubOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5105,9 +5280,11 @@ struct SubOptionsBuilder
inline flatbuffers::Offset<SubOptions> CreateSubOptions(
flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool pot_scale_int16 = true)
{
SubOptionsBuilder builder_(_fbb);
+ builder_.add_pot_scale_int16(pot_scale_int16);
builder_.add_fused_activation_function(fused_activation_function);
return builder_.Finish();
}
@@ -5145,7 +5322,6 @@ struct DivOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DivOptionsBuilder &operator=(const DivOptionsBuilder &);
flatbuffers::Offset<DivOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5181,7 +5357,6 @@ struct TopKV2OptionsBuilder
{
start_ = fbb_.StartTable();
}
- TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
flatbuffers::Offset<TopKV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5228,7 +5403,6 @@ struct EmbeddingLookupSparseOptionsBuilder
{
start_ = fbb_.StartTable();
}
- EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5251,13 +5425,15 @@ struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
typedef GatherOptionsBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_AXIS = 4
+ VT_AXIS = 4,
+ VT_BATCH_DIMS = 6
};
int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
+ VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
}
};
@@ -5267,11 +5443,14 @@ struct GatherOptionsBuilder
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+ void add_batch_dims(int32_t batch_dims)
+ {
+ fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+ }
explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
flatbuffers::Offset<GatherOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5280,10 +5459,11 @@ struct GatherOptionsBuilder
}
};
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
{
GatherOptionsBuilder builder_(_fbb);
+ builder_.add_batch_dims(batch_dims);
builder_.add_axis(axis);
return builder_.Finish();
}
@@ -5306,7 +5486,6 @@ struct TransposeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
flatbuffers::Offset<TransposeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5340,7 +5519,6 @@ struct ExpOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
flatbuffers::Offset<ExpOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5373,7 +5551,6 @@ struct CosOptionsBuilder
{
start_ = fbb_.StartTable();
}
- CosOptionsBuilder &operator=(const CosOptionsBuilder &);
flatbuffers::Offset<CosOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5416,7 +5593,6 @@ struct ReducerOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
flatbuffers::Offset<ReducerOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5464,7 +5640,6 @@ struct SqueezeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
flatbuffers::Offset<SqueezeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5518,7 +5693,6 @@ struct SplitOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
flatbuffers::Offset<SplitOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5563,7 +5737,6 @@ struct SplitVOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
flatbuffers::Offset<SplitVOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5635,7 +5808,6 @@ struct StridedSliceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
flatbuffers::Offset<StridedSliceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5676,7 +5848,6 @@ struct LogSoftmaxOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
flatbuffers::Offset<LogSoftmaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5732,7 +5903,6 @@ struct CastOptionsBuilder
{
start_ = fbb_.StartTable();
}
- CastOptionsBuilder &operator=(const CastOptionsBuilder &);
flatbuffers::Offset<CastOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5770,7 +5940,6 @@ struct DequantizeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
flatbuffers::Offset<DequantizeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5804,7 +5973,6 @@ struct MaximumMinimumOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
flatbuffers::Offset<MaximumMinimumOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5838,7 +6006,6 @@ struct TileOptionsBuilder
{
start_ = fbb_.StartTable();
}
- TileOptionsBuilder &operator=(const TileOptionsBuilder &);
flatbuffers::Offset<TileOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5884,7 +6051,6 @@ struct ArgMaxOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
flatbuffers::Offset<ArgMaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5933,7 +6099,6 @@ struct ArgMinOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
flatbuffers::Offset<ArgMinOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5969,7 +6134,6 @@ struct GreaterOptionsBuilder
{
start_ = fbb_.StartTable();
}
- GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
flatbuffers::Offset<GreaterOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6003,7 +6167,6 @@ struct GreaterEqualOptionsBuilder
{
start_ = fbb_.StartTable();
}
- GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
flatbuffers::Offset<GreaterEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6037,7 +6200,6 @@ struct LessOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LessOptionsBuilder &operator=(const LessOptionsBuilder &);
flatbuffers::Offset<LessOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6070,7 +6232,6 @@ struct LessEqualOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
flatbuffers::Offset<LessEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6104,7 +6265,6 @@ struct NegOptionsBuilder
{
start_ = fbb_.StartTable();
}
- NegOptionsBuilder &operator=(const NegOptionsBuilder &);
flatbuffers::Offset<NegOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6137,7 +6297,6 @@ struct SelectOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
flatbuffers::Offset<SelectOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6170,7 +6329,6 @@ struct SliceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
flatbuffers::Offset<SliceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6229,7 +6387,6 @@ struct TransposeConvOptionsBuilder
{
start_ = fbb_.StartTable();
}
- TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
flatbuffers::Offset<TransposeConvOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6268,7 +6425,6 @@ struct ExpandDimsOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
flatbuffers::Offset<ExpandDimsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6313,7 +6469,6 @@ struct SparseToDenseOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
flatbuffers::Offset<SparseToDenseOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6348,7 +6503,6 @@ struct EqualOptionsBuilder
{
start_ = fbb_.StartTable();
}
- EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
flatbuffers::Offset<EqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6381,7 +6535,6 @@ struct NotEqualOptionsBuilder
{
start_ = fbb_.StartTable();
}
- NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
flatbuffers::Offset<NotEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6428,7 +6581,6 @@ struct ShapeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
flatbuffers::Offset<ShapeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6464,7 +6616,6 @@ struct RankOptionsBuilder
{
start_ = fbb_.StartTable();
}
- RankOptionsBuilder &operator=(const RankOptionsBuilder &);
flatbuffers::Offset<RankOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6497,7 +6648,6 @@ struct PowOptionsBuilder
{
start_ = fbb_.StartTable();
}
- PowOptionsBuilder &operator=(const PowOptionsBuilder &);
flatbuffers::Offset<PowOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6554,7 +6704,6 @@ struct FakeQuantOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
flatbuffers::Offset<FakeQuantOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6606,7 +6755,6 @@ struct PackOptionsBuilder
{
start_ = fbb_.StartTable();
}
- PackOptionsBuilder &operator=(const PackOptionsBuilder &);
flatbuffers::Offset<PackOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6642,7 +6790,6 @@ struct LogicalOrOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
flatbuffers::Offset<LogicalOrOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6683,7 +6830,6 @@ struct OneHotOptionsBuilder
{
start_ = fbb_.StartTable();
}
- OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
flatbuffers::Offset<OneHotOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6718,7 +6864,6 @@ struct AbsOptionsBuilder
{
start_ = fbb_.StartTable();
}
- AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
flatbuffers::Offset<AbsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6751,7 +6896,6 @@ struct HardSwishOptionsBuilder
{
start_ = fbb_.StartTable();
}
- HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
flatbuffers::Offset<HardSwishOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6785,7 +6929,6 @@ struct LogicalAndOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
flatbuffers::Offset<LogicalAndOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6819,7 +6962,6 @@ struct LogicalNotOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
flatbuffers::Offset<LogicalNotOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6863,7 +7005,6 @@ struct UnpackOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
flatbuffers::Offset<UnpackOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6899,7 +7040,6 @@ struct FloorDivOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
flatbuffers::Offset<FloorDivOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6933,7 +7073,6 @@ struct SquareOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
flatbuffers::Offset<SquareOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6966,7 +7105,6 @@ struct ZerosLikeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
flatbuffers::Offset<ZerosLikeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7000,7 +7138,6 @@ struct FillOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FillOptionsBuilder &operator=(const FillOptionsBuilder &);
flatbuffers::Offset<FillOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7033,7 +7170,6 @@ struct FloorModOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
flatbuffers::Offset<FloorModOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7067,7 +7203,6 @@ struct RangeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
flatbuffers::Offset<RangeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7107,7 +7242,6 @@ struct LeakyReluOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
flatbuffers::Offset<LeakyReluOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7142,7 +7276,6 @@ struct SquaredDifferenceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
flatbuffers::Offset<SquaredDifferenceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7189,7 +7322,6 @@ struct MirrorPadOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
flatbuffers::Offset<MirrorPadOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7238,7 +7370,6 @@ struct UniqueOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
flatbuffers::Offset<UniqueOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7274,7 +7405,6 @@ struct ReverseV2OptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
flatbuffers::Offset<ReverseV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7308,7 +7438,6 @@ struct AddNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
flatbuffers::Offset<AddNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7341,7 +7470,6 @@ struct GatherNdOptionsBuilder
{
start_ = fbb_.StartTable();
}
- GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
flatbuffers::Offset<GatherNdOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7375,7 +7503,6 @@ struct WhereOptionsBuilder
{
start_ = fbb_.StartTable();
}
- WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
flatbuffers::Offset<WhereOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7424,7 +7551,6 @@ struct ReverseSequenceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
flatbuffers::Offset<ReverseSequenceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7461,7 +7587,6 @@ struct MatrixDiagOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
flatbuffers::Offset<MatrixDiagOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7495,7 +7620,6 @@ struct QuantizeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
flatbuffers::Offset<QuantizeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7529,7 +7653,6 @@ struct MatrixSetDiagOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
flatbuffers::Offset<MatrixSetDiagOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7579,7 +7702,6 @@ struct IfOptionsBuilder
{
start_ = fbb_.StartTable();
}
- IfOptionsBuilder &operator=(const IfOptionsBuilder &);
flatbuffers::Offset<IfOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7598,6 +7720,50 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
return builder_.Finish();
}
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef CallOnceOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_INIT_SUBGRAPH_INDEX = 4
+ };
+ int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+ verifier.EndTable();
+ }
+};
+
+struct CallOnceOptionsBuilder
+{
+ typedef CallOnceOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_init_subgraph_index(int32_t init_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+ }
+ explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<CallOnceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CallOnceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+ CallOnceOptionsBuilder builder_(_fbb);
+ builder_.add_init_subgraph_index(init_subgraph_index);
+ return builder_.Finish();
+}
+
struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
typedef WhileOptionsBuilder Builder;
@@ -7632,7 +7798,6 @@ struct WhileOptionsBuilder
{
start_ = fbb_.StartTable();
}
- WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
flatbuffers::Offset<WhileOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7669,7 +7834,6 @@ struct NonMaxSuppressionV4OptionsBuilder
{
start_ = fbb_.StartTable();
}
- NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7703,7 +7867,6 @@ struct NonMaxSuppressionV5OptionsBuilder
{
start_ = fbb_.StartTable();
}
- NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7737,7 +7900,6 @@ struct ScatterNdOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
flatbuffers::Offset<ScatterNdOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7771,7 +7933,6 @@ struct SelectV2OptionsBuilder
{
start_ = fbb_.StartTable();
}
- SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
flatbuffers::Offset<SelectV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7805,7 +7966,6 @@ struct DensifyOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
flatbuffers::Offset<DensifyOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7839,7 +7999,6 @@ struct SegmentSumOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
flatbuffers::Offset<SegmentSumOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7861,14 +8020,20 @@ struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_ADJOINT_LHS = 4,
- VT_ADJOINT_RHS = 6
+ VT_ADJOINT_RHS = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
};
bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
- VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+ VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
}
};
@@ -7887,11 +8052,15 @@ struct BatchMatMulOptionsBuilder
fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
0);
}
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+ }
explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
flatbuffers::Offset<BatchMatMulOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7902,14 +8071,478 @@ struct BatchMatMulOptionsBuilder
inline flatbuffers::Offset<BatchMatMulOptions>
CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
- bool adjoint_rhs = false)
+ bool adjoint_rhs = false, bool asymmetric_quantize_inputs = false)
{
BatchMatMulOptionsBuilder builder_(_fbb);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
builder_.add_adjoint_rhs(adjoint_rhs);
builder_.add_adjoint_lhs(adjoint_lhs);
return builder_.Finish();
}
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef CumsumOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_EXCLUSIVE = 4,
+ VT_REVERSE = 6
+ };
+ bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+ bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+ VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
+ }
+};
+
+struct CumsumOptionsBuilder
+{
+ typedef CumsumOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_exclusive(bool exclusive)
+ {
+ fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+ }
+ void add_reverse(bool reverse)
+ {
+ fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+ }
+ explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<CumsumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CumsumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ bool exclusive = false,
+ bool reverse = false)
+{
+ CumsumOptionsBuilder builder_(_fbb);
+ builder_.add_reverse(reverse);
+ builder_.add_exclusive(exclusive);
+ return builder_.Finish();
+}
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef BroadcastToOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct BroadcastToOptionsBuilder
+{
+ typedef BroadcastToOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<BroadcastToOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ BroadcastToOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef Rfft2dOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct Rfft2dOptionsBuilder
+{
+ typedef Rfft2dOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<Rfft2dOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ Rfft2dOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_TABLE_ID = 4,
+ VT_KEY_DTYPE = 6,
+ VT_VALUE_DTYPE = 8
+ };
+ int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+ circle::TensorType key_dtype() const
+ {
+ return static_cast<circle::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+ }
+ circle::TensorType value_dtype() const
+ {
+ return static_cast<circle::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+ VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+ VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+ }
+};
+
+struct HashtableOptionsBuilder
+{
+ typedef HashtableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_table_id(int32_t table_id)
+ {
+ fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+ }
+ void add_key_dtype(circle::TensorType key_dtype)
+ {
+ fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+ }
+ void add_value_dtype(circle::TensorType value_dtype)
+ {
+ fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+ }
+ explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+ circle::TensorType key_dtype = circle::TensorType_FLOAT32,
+ circle::TensorType value_dtype = circle::TensorType_FLOAT32)
+{
+ HashtableOptionsBuilder builder_(_fbb);
+ builder_.add_table_id(table_id);
+ builder_.add_value_dtype(value_dtype);
+ builder_.add_key_dtype(key_dtype);
+ return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableFindOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableFindOptionsBuilder
+{
+ typedef HashtableFindOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableFindOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableFindOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableImportOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableImportOptionsBuilder
+{
+ typedef HashtableImportOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableImportOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableImportOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableSizeOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+ typedef HashtableSizeOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableSizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableSizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef VarHandleOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_CONTAINER = 4,
+ VT_SHARED_NAME = 6
+ };
+ const flatbuffers::String *container() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+ }
+ const flatbuffers::String *shared_name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+ verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+ verifier.VerifyString(shared_name()) && verifier.EndTable();
+ }
+};
+
+struct VarHandleOptionsBuilder
+{
+ typedef VarHandleOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_container(flatbuffers::Offset<flatbuffers::String> container)
+ {
+ fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+ }
+ void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+ {
+ fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+ }
+ explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<VarHandleOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<VarHandleOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> container = 0,
+ flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+ VarHandleOptionsBuilder builder_(_fbb);
+ builder_.add_shared_name(shared_name);
+ builder_.add_container(container);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+ const char *shared_name = nullptr)
+{
+ auto container__ = container ? _fbb.CreateString(container) : 0;
+ auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+ return circle::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef ReadVariableOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ReadVariableOptionsBuilder
+{
+ typedef ReadVariableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<ReadVariableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ReadVariableOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef AssignVariableOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AssignVariableOptionsBuilder
+{
+ typedef AssignVariableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<AssignVariableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AssignVariableOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef RandomOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_SEED = 4,
+ VT_SEED2 = 6
+ };
+ int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); }
+ int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) &&
+ VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable();
+ }
+};
+
+struct RandomOptionsBuilder
+{
+ typedef RandomOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); }
+ void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); }
+ explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<RandomOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RandomOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t seed = 0, int32_t seed2 = 0)
+{
+ RandomOptionsBuilder builder_(_fbb);
+ builder_.add_seed2(seed2);
+ builder_.add_seed(seed);
+ return builder_.Finish();
+}
+
struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
typedef BCQGatherOptionsBuilder Builder;
@@ -7941,7 +8574,6 @@ struct BCQGatherOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BCQGatherOptionsBuilder &operator=(const BCQGatherOptionsBuilder &);
flatbuffers::Offset<BCQGatherOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8000,7 +8632,6 @@ struct BCQFullyConnectedOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BCQFullyConnectedOptionsBuilder &operator=(const BCQFullyConnectedOptionsBuilder &);
flatbuffers::Offset<BCQFullyConnectedOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8058,7 +8689,6 @@ struct InstanceNormOptionsBuilder
{
start_ = fbb_.StartTable();
}
- InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &);
flatbuffers::Offset<InstanceNormOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8082,24 +8712,28 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
typedef OperatorCodeBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_BUILTIN_CODE = 4,
+ VT_DEPRECATED_BUILTIN_CODE = 4,
VT_CUSTOM_CODE = 6,
- VT_VERSION = 8
+ VT_VERSION = 8,
+ VT_BUILTIN_CODE = 10
};
- circle::BuiltinOperator builtin_code() const
- {
- return static_cast<circle::BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0));
- }
+ int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
const flatbuffers::String *custom_code() const
{
return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
}
int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+ circle::BuiltinOperator builtin_code() const
+ {
+ return static_cast<circle::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) &&
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
- VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+ VerifyField<int32_t>(verifier, VT_VERSION) &&
+ VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
}
};
@@ -8108,9 +8742,9 @@ struct OperatorCodeBuilder
typedef OperatorCode Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_builtin_code(circle::BuiltinOperator builtin_code)
+ void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
{
- fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0);
+ fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
}
void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
{
@@ -8120,11 +8754,14 @@ struct OperatorCodeBuilder
{
fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
}
+ void add_builtin_code(circle::BuiltinOperator builtin_code)
+ {
+ fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+ }
explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
flatbuffers::Offset<OperatorCode> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8134,24 +8771,26 @@ struct OperatorCodeBuilder
};
inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
- circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD,
- flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+ flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+ circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
{
OperatorCodeBuilder builder_(_fbb);
+ builder_.add_builtin_code(builtin_code);
builder_.add_version(version);
builder_.add_custom_code(custom_code);
- builder_.add_builtin_code(builtin_code);
+ builder_.add_deprecated_builtin_code(deprecated_builtin_code);
return builder_.Finish();
}
inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
- circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD,
- const char *custom_code = nullptr, int32_t version = 1)
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+ const char *custom_code = nullptr, int32_t version = 1,
+ circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
{
auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
- return circle::CreateOperatorCode(_fbb, builtin_code, custom_code__, version);
+ return circle::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+ builtin_code);
}
struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8796,6 +9435,84 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
: nullptr;
}
+ const circle::CumsumOptions *builtin_options_as_CumsumOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_CumsumOptions
+ ? static_cast<const circle::CumsumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::CallOnceOptions *builtin_options_as_CallOnceOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_CallOnceOptions
+ ? static_cast<const circle::CallOnceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_BroadcastToOptions
+ ? static_cast<const circle::BroadcastToOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_Rfft2dOptions
+ ? static_cast<const circle::Rfft2dOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::Conv3DOptions *builtin_options_as_Conv3DOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_Conv3DOptions
+ ? static_cast<const circle::Conv3DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::HashtableOptions *builtin_options_as_HashtableOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_HashtableOptions
+ ? static_cast<const circle::HashtableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_HashtableFindOptions
+ ? static_cast<const circle::HashtableFindOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_HashtableImportOptions
+ ? static_cast<const circle::HashtableImportOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_HashtableSizeOptions
+ ? static_cast<const circle::HashtableSizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::VarHandleOptions *builtin_options_as_VarHandleOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_VarHandleOptions
+ ? static_cast<const circle::VarHandleOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_ReadVariableOptions
+ ? static_cast<const circle::ReadVariableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_AssignVariableOptions
+ ? static_cast<const circle::AssignVariableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const circle::RandomOptions *builtin_options_as_RandomOptions() const
+ {
+ return builtin_options_type() == circle::BuiltinOptions_RandomOptions
+ ? static_cast<const circle::RandomOptions *>(builtin_options())
+ : nullptr;
+ }
const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
@@ -9498,6 +10215,92 @@ Operator::builtin_options_as<circle::BatchMatMulOptions>() const
}
template <>
+inline const circle::CumsumOptions *Operator::builtin_options_as<circle::CumsumOptions>() const
+{
+ return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const circle::CallOnceOptions *Operator::builtin_options_as<circle::CallOnceOptions>() const
+{
+ return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const circle::BroadcastToOptions *
+Operator::builtin_options_as<circle::BroadcastToOptions>() const
+{
+ return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const circle::Rfft2dOptions *Operator::builtin_options_as<circle::Rfft2dOptions>() const
+{
+ return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const circle::Conv3DOptions *Operator::builtin_options_as<circle::Conv3DOptions>() const
+{
+ return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const circle::HashtableOptions *
+Operator::builtin_options_as<circle::HashtableOptions>() const
+{
+ return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const circle::HashtableFindOptions *
+Operator::builtin_options_as<circle::HashtableFindOptions>() const
+{
+ return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const circle::HashtableImportOptions *
+Operator::builtin_options_as<circle::HashtableImportOptions>() const
+{
+ return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const circle::HashtableSizeOptions *
+Operator::builtin_options_as<circle::HashtableSizeOptions>() const
+{
+ return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const circle::VarHandleOptions *
+Operator::builtin_options_as<circle::VarHandleOptions>() const
+{
+ return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const circle::ReadVariableOptions *
+Operator::builtin_options_as<circle::ReadVariableOptions>() const
+{
+ return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const circle::AssignVariableOptions *
+Operator::builtin_options_as<circle::AssignVariableOptions>() const
+{
+ return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const circle::RandomOptions *Operator::builtin_options_as<circle::RandomOptions>() const
+{
+ return builtin_options_as_RandomOptions();
+}
+
+template <>
inline const circle::BCQGatherOptions *
Operator::builtin_options_as<circle::BCQGatherOptions>() const
{
@@ -9566,7 +10369,6 @@ struct OperatorBuilder
{
start_ = fbb_.StartTable();
}
- OperatorBuilder &operator=(const OperatorBuilder &);
flatbuffers::Offset<Operator> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -9705,7 +10507,6 @@ struct SubGraphBuilder
{
start_ = fbb_.StartTable();
}
- SubGraphBuilder &operator=(const SubGraphBuilder &);
flatbuffers::Offset<SubGraph> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -9781,7 +10582,6 @@ struct BufferBuilder
{
start_ = fbb_.StartTable();
}
- BufferBuilder &operator=(const BufferBuilder &);
flatbuffers::Offset<Buffer> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -9845,7 +10645,6 @@ struct MetadataBuilder
{
start_ = fbb_.StartTable();
}
- MetadataBuilder &operator=(const MetadataBuilder &);
flatbuffers::Offset<Metadata> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -9872,6 +10671,168 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
return circle::CreateMetadata(_fbb, name__, buffer);
}
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef TensorMapBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_NAME = 4,
+ VT_TENSOR_INDEX = 6
+ };
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+ verifier.EndTable();
+ }
+};
+
+struct TensorMapBuilder
+{
+ typedef TensorMap Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(TensorMap::VT_NAME, name);
+ }
+ void add_tensor_index(uint32_t tensor_index)
+ {
+ fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+ }
+ explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<TensorMap> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TensorMap>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+ TensorMapBuilder builder_(_fbb);
+ builder_.add_tensor_index(tensor_index);
+ builder_.add_name(name);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ uint32_t tensor_index = 0)
+{
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ return circle::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef SignatureDefBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_INPUTS = 4,
+ VT_OUTPUTS = 6,
+ VT_SIGNATURE_KEY = 8,
+ VT_SUBGRAPH_INDEX = 12
+ };
+ const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+ VT_INPUTS);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+ VT_OUTPUTS);
+ }
+ const flatbuffers::String *signature_key() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+ }
+ uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+ verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+ verifier.VerifyString(signature_key()) &&
+ VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct SignatureDefBuilder
+{
+ typedef SignatureDef Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_inputs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs)
+ {
+ fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+ }
+ void add_outputs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs)
+ {
+ fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+ }
+ void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+ {
+ fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+ }
+ void add_subgraph_index(uint32_t subgraph_index)
+ {
+ fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+ }
+ explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<SignatureDef> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SignatureDef>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+ SignatureDefBuilder builder_(_fbb);
+ builder_.add_subgraph_index(subgraph_index);
+ builder_.add_signature_key(signature_key);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<circle::TensorMap>> *inputs = nullptr,
+ const std::vector<flatbuffers::Offset<circle::TensorMap>> *outputs = nullptr,
+ const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+ auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*inputs) : 0;
+ auto outputs__ =
+ outputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*outputs) : 0;
+ auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+ return circle::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, subgraph_index);
+}
+
struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
typedef ModelBuilder Builder;
@@ -9883,7 +10844,8 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_DESCRIPTION = 10,
VT_BUFFERS = 12,
VT_METADATA_BUFFER = 14,
- VT_METADATA = 16
+ VT_METADATA = 16,
+ VT_SIGNATURE_DEFS = 18
};
uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
@@ -9913,6 +10875,11 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
VT_METADATA);
}
+ const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *>(
+ VT_SIGNATURE_DEFS);
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
@@ -9924,7 +10891,9 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
- verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+ verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+ verifier.VerifyVector(signature_defs()) &&
+ verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
}
};
@@ -9963,11 +10932,16 @@ struct ModelBuilder
{
fbb_.AddOffset(Model::VT_METADATA, metadata);
}
+ void add_signature_defs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+ signature_defs)
+ {
+ fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+ }
explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ModelBuilder &operator=(const ModelBuilder &);
flatbuffers::Offset<Model> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -9984,9 +10958,12 @@ inline flatbuffers::Offset<Model> CreateModel(
flatbuffers::Offset<flatbuffers::String> description = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+ signature_defs = 0)
{
ModelBuilder builder_(_fbb);
+ builder_.add_signature_defs(signature_defs);
builder_.add_metadata(metadata);
builder_.add_metadata_buffer(metadata_buffer);
builder_.add_buffers(buffers);
@@ -10004,7 +10981,8 @@ inline flatbuffers::Offset<Model> CreateModelDirect(
const char *description = nullptr,
const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
const std::vector<int32_t> *metadata_buffer = nullptr,
- const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
+ const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr,
+ const std::vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs = nullptr)
{
auto operator_codes__ =
operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
@@ -10016,8 +10994,11 @@ inline flatbuffers::Offset<Model> CreateModelDirect(
auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
auto metadata__ =
metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+ auto signature_defs__ =
+ signature_defs ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(*signature_defs)
+ : 0;
return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
- metadata_buffer__, metadata__);
+ metadata_buffer__, metadata__, signature_defs__);
}
inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -10621,6 +11602,71 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj);
return verifier.VerifyTable(ptr);
}
+ case BuiltinOptions_CumsumOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CallOnceOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BroadcastToOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Rfft2dOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Conv3DOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableFindOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableImportOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableSizeOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_VarHandleOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReadVariableOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AssignVariableOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RandomOptions:
+ {
+ auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
case BuiltinOptions_BCQGatherOptions:
{
auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj);
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 9ecb7d190..ba739f618 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -82,6 +82,27 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
return static_cast<uint32_t>(int32_value);
}
+Activation getActivation(Operands &operands, const OperandIndex index)
+{
+ switch (operands.at(index).asScalar<int32_t>())
+ {
+ case 0:
+ return Activation::NONE;
+ case 1:
+ return Activation::RELU;
+ case 2:
+ return Activation::RELU1;
+ case 3:
+ return Activation::RELU6;
+ case 4:
+ return Activation::TANH;
+ case 6:
+ return Activation::SIGMOID;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+
OperationFactory::Generator
getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
float alpha = 0.f, float beta = 0.f)
@@ -519,10 +540,6 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_CAST] =
getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
- // ANEURALNETWORKS_CAST_EX is deprecated
- // TODO Remove ANEURALNETWORKS_CAST_EX
- _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST];
-
_map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param,
Operands &operands) {
using operation::Conv2D;
@@ -651,10 +668,6 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_REDUCE_SUM] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
- // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
- // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
- _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
-
_map[ANEURALNETWORKS_SUB] =
getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
@@ -770,10 +783,6 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
- // ANEURALNETWORKS_EXP_EX is deprecated
- // TODO Remove ANEURALNETWORKS_EXP_EX
- _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
-
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Axis Tensor Index
@@ -791,52 +800,6 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_EQUAL] =
getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
- // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
- // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
- _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- // ANEURALNETWORKS_LESS_EX is deprecated
- // TODO Remove ANEURALNETWORKS_LESS_EX
- _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Less;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_REDUCE_ALL] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
@@ -846,61 +809,9 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_REDUCE_MAX] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
- // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
- // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
- _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
-
- // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
- // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
- _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input1 Tensor Index
- // 1 -> input2 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_LOGICAL_AND] =
getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
- // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
- // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
- _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, inputs.at(1), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- operation::ElementwiseBinary::Param param;
- param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
-
- return new operation::ElementwiseBinary{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_RSQRT] =
getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
@@ -919,24 +830,7 @@ OperationFactory::OperationFactory()
return new operation::Select{inputs, outputs};
};
- _map[ANEURALNETWORKS_SELECT_V2_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Condition Tensor Index
- // 1 -> Input X(true) Tensor Index
- // 2 -> Input Y(false) Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
- return new operation::Select{inputs, outputs};
- };
-
- // ANEURALNETWORKS_RSQRT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_RSQRT_EX
- _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
+ _map[ANEURALNETWORKS_SELECT_V2_EX] = _map[ANEURALNETWORKS_SELECT];
_map[ANEURALNETWORKS_RELU] =
getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
@@ -1141,10 +1035,6 @@ OperationFactory::OperationFactory()
return new operation::PReLU{inputs, outputs};
};
- // ANEURALNETWORKS_PRELU_EX is deprecated
- // TODO Remove ANEURALNETWORKS_PRELU_EX
- _map[ANEURALNETWORKS_PRELU_EX] = _map[ANEURALNETWORKS_PRELU];
-
_map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
assert(init_param.input_count == 6 && init_param.output_count == 1);
@@ -1178,64 +1068,12 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_SQRT] =
getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
- // ANEURALNETWORKS_SQRT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_SQRT_EX
- _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
-
_map[ANEURALNETWORKS_LOGICAL_OR] =
getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
- // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
- // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
- _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, inputs.at(1), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- operation::ElementwiseBinary::Param param;
- param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
-
- return new operation::ElementwiseBinary{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_LOGICAL_NOT] =
getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
- // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
- _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // This operation's operands must be boolean type.
- replaceDataType(operands, inputs.at(0), DataType::BOOL8);
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- operation::ElementwiseUnary::Param param;
- param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
-
- return new operation::ElementwiseUnary{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 23 && init_param.output_count == 4);
@@ -1280,31 +1118,7 @@ OperationFactory::OperationFactory()
}
operation::LSTM::Param param;
- const auto activation_index = OperandIndex{init_param.inputs[20]};
- switch (operands.at(activation_index).asScalar<int32_t>())
- {
- case 0:
- param.activation = Activation::NONE;
- break;
- case 1:
- param.activation = Activation::RELU;
- break;
- case 2:
- param.activation = Activation::RELU1;
- break;
- case 3:
- param.activation = Activation::RELU6;
- break;
- case 4:
- param.activation = Activation::TANH;
- break;
- case 6:
- param.activation = Activation::SIGMOID;
- break;
- default:
- throw std::runtime_error("Unsupported activation type");
- break;
- }
+ param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
// This is initialization to prevent warning or error by static code analyzer. LSTM operation
@@ -1378,31 +1192,7 @@ OperationFactory::OperationFactory()
output_index};
operation::LSTM::Param param;
- const auto activation_index = OperandIndex{init_param.inputs[20]};
- switch (operands.at(activation_index).asScalar<int32_t>())
- {
- case 0:
- param.activation = Activation::NONE;
- break;
- case 1:
- param.activation = Activation::RELU;
- break;
- case 2:
- param.activation = Activation::RELU1;
- break;
- case 3:
- param.activation = Activation::RELU6;
- break;
- case 4:
- param.activation = Activation::TANH;
- break;
- case 6:
- param.activation = Activation::SIGMOID;
- break;
- default:
- throw std::runtime_error("Unsupported activation type");
- break;
- }
+ param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
param.time_major = operands.at(OperandIndex{init_param.inputs[23]}).asScalar<bool>();
@@ -1410,29 +1200,6 @@ OperationFactory::OperationFactory()
return new operation::LSTM{inputs, outputs, param};
};
- // ANEURALNETWORKS_EQUAL_EX is deprecated
- // TODO Remove ANEURALNETWORKS_EQUAL_EX
- _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
- // Output operand type must be boolean
- replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
_map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param,
Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1470,10 +1237,6 @@ OperationFactory::OperationFactory()
return new operation::TopKV2{inputs, outputs, param};
};
- // ANEURALNETWORKS_CAST_EX is deprecated
- // TODO Remove ANEURALNETWORKS_CAST_EX
- _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2];
-
_map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1492,22 +1255,10 @@ OperationFactory::OperationFactory()
return new operation::Gather{inputs, outputs, param};
};
- // ANEURALNETWORKS_GATHER_EX is deprecated
- // TODO Remove ANEURALNETWORKS_GATHER_EX
- _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
-
_map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
- // ANEURALNETWORKS_NEG_EX is deprecated
- // TODO Remove ANEURALNETWORKS_NEG_EX
- _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
-
_map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
- // ANEURALNETWORKS_ABS_EX is deprecated
- // TODO Remove ANEURALNETWORKS_ABS_EX
- _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
-
_map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1527,10 +1278,6 @@ OperationFactory::OperationFactory()
return new operation::ArgMinMax{inputs, outputs, param};
};
- // ANEURALNETWORKS_ARGMAX_EX is deprecated
- // TODO Remove ANEURALNETWORKS_ARGMAX_EX
- _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
-
_map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1630,10 +1377,6 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_REDUCE_MIN] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
- // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
- // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
- _map[ANEURALNETWORKS_REDUCE_MIN_EX] = _map[ANEURALNETWORKS_REDUCE_MIN];
-
_map[ANEURALNETWORKS_SPLIT] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3);
assert(init_param.output_count >= 1); // At least one output tensor and axis
@@ -1668,10 +1411,6 @@ OperationFactory::OperationFactory()
return new operation::SplitV{inputs, outputs, param};
};
- // ANEURALNETWORKS_SPLIT_EX is deprecated
- // TODO Remove ANEURALNETWORKS_SPLIT_EX
- _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
-
_map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count >= 1);
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index a3038b718..3b160473d 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -64,6 +64,12 @@ struct LoaderDomain
class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain>
{
+protected:
+ // Different option name
+ // Circle: adjoint_lhs, adjoint_rhs
+ // TFLite: adj_x, adj_y
+ void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
public:
using BaseLoader::BaseLoader;
@@ -112,8 +118,40 @@ private:
return subg;
}
+
+ void loadOperation(const onert_tflite::Operator *op, ir::Graph &subg)
+ {
+ auto const builtin_op = getBuiltinOperator(op);
+
+ switch (builtin_op)
+ {
+ case onert_tflite::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+ loadBatchMatMul(op, subg);
+ return;
+ default:
+ BaseLoader::loadOperation(op, subg);
+ return;
+ }
+ }
};
+void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::BatchMatMul::Param param;
+ const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+ param.adj_x = options->adj_x();
+ param.adj_y = options->adj_y();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
} // namespace
std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
index 8e1b84e29..cec5bce74 100644
--- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -26,236 +26,396 @@ namespace onert_tflite
{
struct CustomQuantization;
+struct CustomQuantizationBuilder;
struct QuantizationParameters;
+struct QuantizationParametersBuilder;
struct Int32Vector;
+struct Int32VectorBuilder;
struct Uint16Vector;
+struct Uint16VectorBuilder;
struct Uint8Vector;
+struct Uint8VectorBuilder;
struct DimensionMetadata;
+struct DimensionMetadataBuilder;
struct SparsityParameters;
+struct SparsityParametersBuilder;
struct Tensor;
+struct TensorBuilder;
struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
+
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
struct SVDFOptions;
+struct SVDFOptionsBuilder;
struct RNNOptions;
+struct RNNOptionsBuilder;
struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
struct AddOptions;
+struct AddOptionsBuilder;
struct MulOptions;
+struct MulOptionsBuilder;
struct L2NormOptions;
+struct L2NormOptionsBuilder;
struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
struct LSTMOptions;
+struct LSTMOptionsBuilder;
struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
struct CallOptions;
+struct CallOptionsBuilder;
struct PadOptions;
+struct PadOptionsBuilder;
struct PadV2Options;
+struct PadV2OptionsBuilder;
struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
struct SubOptions;
+struct SubOptionsBuilder;
struct DivOptions;
+struct DivOptionsBuilder;
struct TopKV2Options;
+struct TopKV2OptionsBuilder;
struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
struct GatherOptions;
+struct GatherOptionsBuilder;
struct TransposeOptions;
+struct TransposeOptionsBuilder;
struct ExpOptions;
+struct ExpOptionsBuilder;
struct CosOptions;
+struct CosOptionsBuilder;
struct ReducerOptions;
+struct ReducerOptionsBuilder;
struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
struct SplitOptions;
+struct SplitOptionsBuilder;
struct SplitVOptions;
+struct SplitVOptionsBuilder;
struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
struct CastOptions;
+struct CastOptionsBuilder;
struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
struct TileOptions;
+struct TileOptionsBuilder;
struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
struct GreaterOptions;
+struct GreaterOptionsBuilder;
struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
struct LessOptions;
+struct LessOptionsBuilder;
struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
struct NegOptions;
+struct NegOptionsBuilder;
struct SelectOptions;
+struct SelectOptionsBuilder;
struct SliceOptions;
+struct SliceOptionsBuilder;
struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
struct EqualOptions;
+struct EqualOptionsBuilder;
struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
struct ShapeOptions;
+struct ShapeOptionsBuilder;
struct RankOptions;
+struct RankOptionsBuilder;
struct PowOptions;
+struct PowOptionsBuilder;
struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
struct PackOptions;
+struct PackOptionsBuilder;
struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
struct OneHotOptions;
+struct OneHotOptionsBuilder;
struct AbsOptions;
+struct AbsOptionsBuilder;
struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
struct UnpackOptions;
+struct UnpackOptionsBuilder;
struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
struct SquareOptions;
+struct SquareOptionsBuilder;
struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
struct FillOptions;
+struct FillOptionsBuilder;
struct FloorModOptions;
+struct FloorModOptionsBuilder;
struct RangeOptions;
+struct RangeOptionsBuilder;
struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
struct UniqueOptions;
+struct UniqueOptionsBuilder;
struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
struct AddNOptions;
+struct AddNOptionsBuilder;
struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
struct WhereOptions;
+struct WhereOptionsBuilder;
struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
struct IfOptions;
+struct IfOptionsBuilder;
+
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
struct WhileOptions;
+struct WhileOptionsBuilder;
struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
struct SelectV2Options;
+struct SelectV2OptionsBuilder;
struct DensifyOptions;
+struct DensifyOptionsBuilder;
struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
+
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
struct OperatorCode;
+struct OperatorCodeBuilder;
struct Operator;
+struct OperatorBuilder;
struct SubGraph;
+struct SubGraphBuilder;
struct Buffer;
+struct BufferBuilder;
struct Metadata;
+struct MetadataBuilder;
+
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
struct Model;
+struct ModelBuilder;
-enum TensorType
+enum TensorType : int8_t
{
TensorType_FLOAT32 = 0,
TensorType_FLOAT16 = 1,
@@ -268,34 +428,43 @@ enum TensorType
TensorType_COMPLEX64 = 8,
TensorType_INT8 = 9,
TensorType_FLOAT64 = 10,
+ TensorType_COMPLEX128 = 11,
+ TensorType_UINT64 = 12,
+ TensorType_RESOURCE = 13,
+ TensorType_VARIANT = 14,
+ TensorType_UINT32 = 15,
TensorType_MIN = TensorType_FLOAT32,
- TensorType_MAX = TensorType_FLOAT64
+ TensorType_MAX = TensorType_UINT32
};
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[16]
{
- static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
- TensorType_UINT8, TensorType_INT64, TensorType_STRING,
- TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64,
- TensorType_INT8, TensorType_FLOAT64};
+ static const TensorType values[] = {
+ TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8,
+ TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16,
+ TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, TensorType_COMPLEX128,
+ TensorType_UINT64, TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32};
return values;
}
inline const char *const *EnumNamesTensorType()
{
- static const char *const names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
- "INT64", "STRING", "BOOL", "INT16",
- "COMPLEX64", "INT8", "FLOAT64", nullptr};
+ static const char *const names[17] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64",
+ "STRING", "BOOL", "INT16", "COMPLEX64", "INT8",
+ "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", "VARIANT",
+ "UINT32", nullptr};
return names;
}
inline const char *EnumNameTensorType(TensorType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesTensorType()[index];
}
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
{
QuantizationDetails_NONE = 0,
QuantizationDetails_CustomQuantization = 1,
@@ -312,13 +481,15 @@ inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
inline const char *const *EnumNamesQuantizationDetails()
{
- static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+ static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
return names;
}
inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesQuantizationDetails()[index];
}
@@ -327,7 +498,7 @@ template <typename T> struct QuantizationDetailsTraits
static const QuantizationDetails enum_value = QuantizationDetails_NONE;
};
-template <> struct QuantizationDetailsTraits<CustomQuantization>
+template <> struct QuantizationDetailsTraits<onert_tflite::CustomQuantization>
{
static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
};
@@ -338,7 +509,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum DimensionType
+enum DimensionType : int8_t
{
DimensionType_DENSE = 0,
DimensionType_SPARSE_CSR = 1,
@@ -354,17 +525,19 @@ inline const DimensionType (&EnumValuesDimensionType())[2]
inline const char *const *EnumNamesDimensionType()
{
- static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+ static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr};
return names;
}
inline const char *EnumNameDimensionType(DimensionType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesDimensionType()[index];
}
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
{
SparseIndexVector_NONE = 0,
SparseIndexVector_Int32Vector = 1,
@@ -384,14 +557,16 @@ inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
inline const char *const *EnumNamesSparseIndexVector()
{
- static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
- nullptr};
+ static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+ nullptr};
return names;
}
inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesSparseIndexVector()[index];
}
@@ -400,17 +575,17 @@ template <typename T> struct SparseIndexVectorTraits
static const SparseIndexVector enum_value = SparseIndexVector_NONE;
};
-template <> struct SparseIndexVectorTraits<Int32Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Int32Vector>
{
static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
};
-template <> struct SparseIndexVectorTraits<Uint16Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint16Vector>
{
static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
};
-template <> struct SparseIndexVectorTraits<Uint8Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint8Vector>
{
static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
};
@@ -421,7 +596,7 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
{
BuiltinOperator_ADD = 0,
BuiltinOperator_AVERAGE_POOL_2D = 1,
@@ -550,11 +725,31 @@ enum BuiltinOperator
BuiltinOperator_DENSIFY = 124,
BuiltinOperator_SEGMENT_SUM = 125,
BuiltinOperator_BATCH_MATMUL = 126,
+ BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ BuiltinOperator_CUMSUM = 128,
+ BuiltinOperator_CALL_ONCE = 129,
+ BuiltinOperator_BROADCAST_TO = 130,
+ BuiltinOperator_RFFT2D = 131,
+ BuiltinOperator_CONV_3D = 132,
+ BuiltinOperator_IMAG = 133,
+ BuiltinOperator_REAL = 134,
+ BuiltinOperator_COMPLEX_ABS = 135,
+ BuiltinOperator_HASHTABLE = 136,
+ BuiltinOperator_HASHTABLE_FIND = 137,
+ BuiltinOperator_HASHTABLE_IMPORT = 138,
+ BuiltinOperator_HASHTABLE_SIZE = 139,
+ BuiltinOperator_REDUCE_ALL = 140,
+ BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+ BuiltinOperator_VAR_HANDLE = 142,
+ BuiltinOperator_READ_VARIABLE = 143,
+ BuiltinOperator_ASSIGN_VARIABLE = 144,
+ BuiltinOperator_BROADCAST_ARGS = 145,
+ BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
BuiltinOperator_MIN = BuiltinOperator_ADD,
- BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL
+ BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL
};
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[147]
{
static const BuiltinOperator values[] = {BuiltinOperator_ADD,
BuiltinOperator_AVERAGE_POOL_2D,
@@ -682,150 +877,192 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
BuiltinOperator_SELECT_V2,
BuiltinOperator_DENSIFY,
BuiltinOperator_SEGMENT_SUM,
- BuiltinOperator_BATCH_MATMUL};
+ BuiltinOperator_BATCH_MATMUL,
+ BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+ BuiltinOperator_CUMSUM,
+ BuiltinOperator_CALL_ONCE,
+ BuiltinOperator_BROADCAST_TO,
+ BuiltinOperator_RFFT2D,
+ BuiltinOperator_CONV_3D,
+ BuiltinOperator_IMAG,
+ BuiltinOperator_REAL,
+ BuiltinOperator_COMPLEX_ABS,
+ BuiltinOperator_HASHTABLE,
+ BuiltinOperator_HASHTABLE_FIND,
+ BuiltinOperator_HASHTABLE_IMPORT,
+ BuiltinOperator_HASHTABLE_SIZE,
+ BuiltinOperator_REDUCE_ALL,
+ BuiltinOperator_CONV_3D_TRANSPOSE,
+ BuiltinOperator_VAR_HANDLE,
+ BuiltinOperator_READ_VARIABLE,
+ BuiltinOperator_ASSIGN_VARIABLE,
+ BuiltinOperator_BROADCAST_ARGS,
+ BuiltinOperator_RANDOM_STANDARD_NORMAL};
return values;
}
inline const char *const *EnumNamesBuiltinOperator()
{
- static const char *const names[] = {"ADD",
- "AVERAGE_POOL_2D",
- "CONCATENATION",
- "CONV_2D",
- "DEPTHWISE_CONV_2D",
- "DEPTH_TO_SPACE",
- "DEQUANTIZE",
- "EMBEDDING_LOOKUP",
- "FLOOR",
- "FULLY_CONNECTED",
- "HASHTABLE_LOOKUP",
- "L2_NORMALIZATION",
- "L2_POOL_2D",
- "LOCAL_RESPONSE_NORMALIZATION",
- "LOGISTIC",
- "LSH_PROJECTION",
- "LSTM",
- "MAX_POOL_2D",
- "MUL",
- "RELU",
- "RELU_N1_TO_1",
- "RELU6",
- "RESHAPE",
- "RESIZE_BILINEAR",
- "RNN",
- "SOFTMAX",
- "SPACE_TO_DEPTH",
- "SVDF",
- "TANH",
- "CONCAT_EMBEDDINGS",
- "SKIP_GRAM",
- "CALL",
- "CUSTOM",
- "EMBEDDING_LOOKUP_SPARSE",
- "PAD",
- "UNIDIRECTIONAL_SEQUENCE_RNN",
- "GATHER",
- "BATCH_TO_SPACE_ND",
- "SPACE_TO_BATCH_ND",
- "TRANSPOSE",
- "MEAN",
- "SUB",
- "DIV",
- "SQUEEZE",
- "UNIDIRECTIONAL_SEQUENCE_LSTM",
- "STRIDED_SLICE",
- "BIDIRECTIONAL_SEQUENCE_RNN",
- "EXP",
- "TOPK_V2",
- "SPLIT",
- "LOG_SOFTMAX",
- "DELEGATE",
- "BIDIRECTIONAL_SEQUENCE_LSTM",
- "CAST",
- "PRELU",
- "MAXIMUM",
- "ARG_MAX",
- "MINIMUM",
- "LESS",
- "NEG",
- "PADV2",
- "GREATER",
- "GREATER_EQUAL",
- "LESS_EQUAL",
- "SELECT",
- "SLICE",
- "SIN",
- "TRANSPOSE_CONV",
- "SPARSE_TO_DENSE",
- "TILE",
- "EXPAND_DIMS",
- "EQUAL",
- "NOT_EQUAL",
- "LOG",
- "SUM",
- "SQRT",
- "RSQRT",
- "SHAPE",
- "POW",
- "ARG_MIN",
- "FAKE_QUANT",
- "REDUCE_PROD",
- "REDUCE_MAX",
- "PACK",
- "LOGICAL_OR",
- "ONE_HOT",
- "LOGICAL_AND",
- "LOGICAL_NOT",
- "UNPACK",
- "REDUCE_MIN",
- "FLOOR_DIV",
- "REDUCE_ANY",
- "SQUARE",
- "ZEROS_LIKE",
- "FILL",
- "FLOOR_MOD",
- "RANGE",
- "RESIZE_NEAREST_NEIGHBOR",
- "LEAKY_RELU",
- "SQUARED_DIFFERENCE",
- "MIRROR_PAD",
- "ABS",
- "SPLIT_V",
- "UNIQUE",
- "CEIL",
- "REVERSE_V2",
- "ADD_N",
- "GATHER_ND",
- "COS",
- "WHERE",
- "RANK",
- "ELU",
- "REVERSE_SEQUENCE",
- "MATRIX_DIAG",
- "QUANTIZE",
- "MATRIX_SET_DIAG",
- "ROUND",
- "HARD_SWISH",
- "IF",
- "WHILE",
- "NON_MAX_SUPPRESSION_V4",
- "NON_MAX_SUPPRESSION_V5",
- "SCATTER_ND",
- "SELECT_V2",
- "DENSIFY",
- "SEGMENT_SUM",
- "BATCH_MATMUL",
- nullptr};
+ static const char *const names[148] = {"ADD",
+ "AVERAGE_POOL_2D",
+ "CONCATENATION",
+ "CONV_2D",
+ "DEPTHWISE_CONV_2D",
+ "DEPTH_TO_SPACE",
+ "DEQUANTIZE",
+ "EMBEDDING_LOOKUP",
+ "FLOOR",
+ "FULLY_CONNECTED",
+ "HASHTABLE_LOOKUP",
+ "L2_NORMALIZATION",
+ "L2_POOL_2D",
+ "LOCAL_RESPONSE_NORMALIZATION",
+ "LOGISTIC",
+ "LSH_PROJECTION",
+ "LSTM",
+ "MAX_POOL_2D",
+ "MUL",
+ "RELU",
+ "RELU_N1_TO_1",
+ "RELU6",
+ "RESHAPE",
+ "RESIZE_BILINEAR",
+ "RNN",
+ "SOFTMAX",
+ "SPACE_TO_DEPTH",
+ "SVDF",
+ "TANH",
+ "CONCAT_EMBEDDINGS",
+ "SKIP_GRAM",
+ "CALL",
+ "CUSTOM",
+ "EMBEDDING_LOOKUP_SPARSE",
+ "PAD",
+ "UNIDIRECTIONAL_SEQUENCE_RNN",
+ "GATHER",
+ "BATCH_TO_SPACE_ND",
+ "SPACE_TO_BATCH_ND",
+ "TRANSPOSE",
+ "MEAN",
+ "SUB",
+ "DIV",
+ "SQUEEZE",
+ "UNIDIRECTIONAL_SEQUENCE_LSTM",
+ "STRIDED_SLICE",
+ "BIDIRECTIONAL_SEQUENCE_RNN",
+ "EXP",
+ "TOPK_V2",
+ "SPLIT",
+ "LOG_SOFTMAX",
+ "DELEGATE",
+ "BIDIRECTIONAL_SEQUENCE_LSTM",
+ "CAST",
+ "PRELU",
+ "MAXIMUM",
+ "ARG_MAX",
+ "MINIMUM",
+ "LESS",
+ "NEG",
+ "PADV2",
+ "GREATER",
+ "GREATER_EQUAL",
+ "LESS_EQUAL",
+ "SELECT",
+ "SLICE",
+ "SIN",
+ "TRANSPOSE_CONV",
+ "SPARSE_TO_DENSE",
+ "TILE",
+ "EXPAND_DIMS",
+ "EQUAL",
+ "NOT_EQUAL",
+ "LOG",
+ "SUM",
+ "SQRT",
+ "RSQRT",
+ "SHAPE",
+ "POW",
+ "ARG_MIN",
+ "FAKE_QUANT",
+ "REDUCE_PROD",
+ "REDUCE_MAX",
+ "PACK",
+ "LOGICAL_OR",
+ "ONE_HOT",
+ "LOGICAL_AND",
+ "LOGICAL_NOT",
+ "UNPACK",
+ "REDUCE_MIN",
+ "FLOOR_DIV",
+ "REDUCE_ANY",
+ "SQUARE",
+ "ZEROS_LIKE",
+ "FILL",
+ "FLOOR_MOD",
+ "RANGE",
+ "RESIZE_NEAREST_NEIGHBOR",
+ "LEAKY_RELU",
+ "SQUARED_DIFFERENCE",
+ "MIRROR_PAD",
+ "ABS",
+ "SPLIT_V",
+ "UNIQUE",
+ "CEIL",
+ "REVERSE_V2",
+ "ADD_N",
+ "GATHER_ND",
+ "COS",
+ "WHERE",
+ "RANK",
+ "ELU",
+ "REVERSE_SEQUENCE",
+ "MATRIX_DIAG",
+ "QUANTIZE",
+ "MATRIX_SET_DIAG",
+ "ROUND",
+ "HARD_SWISH",
+ "IF",
+ "WHILE",
+ "NON_MAX_SUPPRESSION_V4",
+ "NON_MAX_SUPPRESSION_V5",
+ "SCATTER_ND",
+ "SELECT_V2",
+ "DENSIFY",
+ "SEGMENT_SUM",
+ "BATCH_MATMUL",
+ "PLACEHOLDER_FOR_GREATER_OP_CODES",
+ "CUMSUM",
+ "CALL_ONCE",
+ "BROADCAST_TO",
+ "RFFT2D",
+ "CONV_3D",
+ "IMAG",
+ "REAL",
+ "COMPLEX_ABS",
+ "HASHTABLE",
+ "HASHTABLE_FIND",
+ "HASHTABLE_IMPORT",
+ "HASHTABLE_SIZE",
+ "REDUCE_ALL",
+ "CONV_3D_TRANSPOSE",
+ "VAR_HANDLE",
+ "READ_VARIABLE",
+ "ASSIGN_VARIABLE",
+ "BROADCAST_ARGS",
+ "RANDOM_STANDARD_NORMAL",
+ nullptr};
return names;
}
inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_RANDOM_STANDARD_NORMAL))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesBuiltinOperator()[index];
}
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
{
BuiltinOptions_NONE = 0,
BuiltinOptions_Conv2DOptions = 1,
@@ -929,11 +1166,24 @@ enum BuiltinOptions
BuiltinOptions_DensifyOptions = 99,
BuiltinOptions_SegmentSumOptions = 100,
BuiltinOptions_BatchMatMulOptions = 101,
+ BuiltinOptions_CumsumOptions = 102,
+ BuiltinOptions_CallOnceOptions = 103,
+ BuiltinOptions_BroadcastToOptions = 104,
+ BuiltinOptions_Rfft2dOptions = 105,
+ BuiltinOptions_Conv3DOptions = 106,
+ BuiltinOptions_HashtableOptions = 107,
+ BuiltinOptions_HashtableFindOptions = 108,
+ BuiltinOptions_HashtableImportOptions = 109,
+ BuiltinOptions_HashtableSizeOptions = 110,
+ BuiltinOptions_VarHandleOptions = 111,
+ BuiltinOptions_ReadVariableOptions = 112,
+ BuiltinOptions_AssignVariableOptions = 113,
+ BuiltinOptions_RandomOptions = 114,
BuiltinOptions_MIN = BuiltinOptions_NONE,
- BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions
+ BuiltinOptions_MAX = BuiltinOptions_RandomOptions
};
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115]
{
static const BuiltinOptions values[] = {BuiltinOptions_NONE,
BuiltinOptions_Conv2DOptions,
@@ -1036,121 +1286,149 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
BuiltinOptions_SelectV2Options,
BuiltinOptions_DensifyOptions,
BuiltinOptions_SegmentSumOptions,
- BuiltinOptions_BatchMatMulOptions};
+ BuiltinOptions_BatchMatMulOptions,
+ BuiltinOptions_CumsumOptions,
+ BuiltinOptions_CallOnceOptions,
+ BuiltinOptions_BroadcastToOptions,
+ BuiltinOptions_Rfft2dOptions,
+ BuiltinOptions_Conv3DOptions,
+ BuiltinOptions_HashtableOptions,
+ BuiltinOptions_HashtableFindOptions,
+ BuiltinOptions_HashtableImportOptions,
+ BuiltinOptions_HashtableSizeOptions,
+ BuiltinOptions_VarHandleOptions,
+ BuiltinOptions_ReadVariableOptions,
+ BuiltinOptions_AssignVariableOptions,
+ BuiltinOptions_RandomOptions};
return values;
}
inline const char *const *EnumNamesBuiltinOptions()
{
- static const char *const names[] = {"NONE",
- "Conv2DOptions",
- "DepthwiseConv2DOptions",
- "ConcatEmbeddingsOptions",
- "LSHProjectionOptions",
- "Pool2DOptions",
- "SVDFOptions",
- "RNNOptions",
- "FullyConnectedOptions",
- "SoftmaxOptions",
- "ConcatenationOptions",
- "AddOptions",
- "L2NormOptions",
- "LocalResponseNormalizationOptions",
- "LSTMOptions",
- "ResizeBilinearOptions",
- "CallOptions",
- "ReshapeOptions",
- "SkipGramOptions",
- "SpaceToDepthOptions",
- "EmbeddingLookupSparseOptions",
- "MulOptions",
- "PadOptions",
- "GatherOptions",
- "BatchToSpaceNDOptions",
- "SpaceToBatchNDOptions",
- "TransposeOptions",
- "ReducerOptions",
- "SubOptions",
- "DivOptions",
- "SqueezeOptions",
- "SequenceRNNOptions",
- "StridedSliceOptions",
- "ExpOptions",
- "TopKV2Options",
- "SplitOptions",
- "LogSoftmaxOptions",
- "CastOptions",
- "DequantizeOptions",
- "MaximumMinimumOptions",
- "ArgMaxOptions",
- "LessOptions",
- "NegOptions",
- "PadV2Options",
- "GreaterOptions",
- "GreaterEqualOptions",
- "LessEqualOptions",
- "SelectOptions",
- "SliceOptions",
- "TransposeConvOptions",
- "SparseToDenseOptions",
- "TileOptions",
- "ExpandDimsOptions",
- "EqualOptions",
- "NotEqualOptions",
- "ShapeOptions",
- "PowOptions",
- "ArgMinOptions",
- "FakeQuantOptions",
- "PackOptions",
- "LogicalOrOptions",
- "OneHotOptions",
- "LogicalAndOptions",
- "LogicalNotOptions",
- "UnpackOptions",
- "FloorDivOptions",
- "SquareOptions",
- "ZerosLikeOptions",
- "FillOptions",
- "BidirectionalSequenceLSTMOptions",
- "BidirectionalSequenceRNNOptions",
- "UnidirectionalSequenceLSTMOptions",
- "FloorModOptions",
- "RangeOptions",
- "ResizeNearestNeighborOptions",
- "LeakyReluOptions",
- "SquaredDifferenceOptions",
- "MirrorPadOptions",
- "AbsOptions",
- "SplitVOptions",
- "UniqueOptions",
- "ReverseV2Options",
- "AddNOptions",
- "GatherNdOptions",
- "CosOptions",
- "WhereOptions",
- "RankOptions",
- "ReverseSequenceOptions",
- "MatrixDiagOptions",
- "QuantizeOptions",
- "MatrixSetDiagOptions",
- "HardSwishOptions",
- "IfOptions",
- "WhileOptions",
- "DepthToSpaceOptions",
- "NonMaxSuppressionV4Options",
- "NonMaxSuppressionV5Options",
- "ScatterNdOptions",
- "SelectV2Options",
- "DensifyOptions",
- "SegmentSumOptions",
- "BatchMatMulOptions",
- nullptr};
+ static const char *const names[116] = {"NONE",
+ "Conv2DOptions",
+ "DepthwiseConv2DOptions",
+ "ConcatEmbeddingsOptions",
+ "LSHProjectionOptions",
+ "Pool2DOptions",
+ "SVDFOptions",
+ "RNNOptions",
+ "FullyConnectedOptions",
+ "SoftmaxOptions",
+ "ConcatenationOptions",
+ "AddOptions",
+ "L2NormOptions",
+ "LocalResponseNormalizationOptions",
+ "LSTMOptions",
+ "ResizeBilinearOptions",
+ "CallOptions",
+ "ReshapeOptions",
+ "SkipGramOptions",
+ "SpaceToDepthOptions",
+ "EmbeddingLookupSparseOptions",
+ "MulOptions",
+ "PadOptions",
+ "GatherOptions",
+ "BatchToSpaceNDOptions",
+ "SpaceToBatchNDOptions",
+ "TransposeOptions",
+ "ReducerOptions",
+ "SubOptions",
+ "DivOptions",
+ "SqueezeOptions",
+ "SequenceRNNOptions",
+ "StridedSliceOptions",
+ "ExpOptions",
+ "TopKV2Options",
+ "SplitOptions",
+ "LogSoftmaxOptions",
+ "CastOptions",
+ "DequantizeOptions",
+ "MaximumMinimumOptions",
+ "ArgMaxOptions",
+ "LessOptions",
+ "NegOptions",
+ "PadV2Options",
+ "GreaterOptions",
+ "GreaterEqualOptions",
+ "LessEqualOptions",
+ "SelectOptions",
+ "SliceOptions",
+ "TransposeConvOptions",
+ "SparseToDenseOptions",
+ "TileOptions",
+ "ExpandDimsOptions",
+ "EqualOptions",
+ "NotEqualOptions",
+ "ShapeOptions",
+ "PowOptions",
+ "ArgMinOptions",
+ "FakeQuantOptions",
+ "PackOptions",
+ "LogicalOrOptions",
+ "OneHotOptions",
+ "LogicalAndOptions",
+ "LogicalNotOptions",
+ "UnpackOptions",
+ "FloorDivOptions",
+ "SquareOptions",
+ "ZerosLikeOptions",
+ "FillOptions",
+ "BidirectionalSequenceLSTMOptions",
+ "BidirectionalSequenceRNNOptions",
+ "UnidirectionalSequenceLSTMOptions",
+ "FloorModOptions",
+ "RangeOptions",
+ "ResizeNearestNeighborOptions",
+ "LeakyReluOptions",
+ "SquaredDifferenceOptions",
+ "MirrorPadOptions",
+ "AbsOptions",
+ "SplitVOptions",
+ "UniqueOptions",
+ "ReverseV2Options",
+ "AddNOptions",
+ "GatherNdOptions",
+ "CosOptions",
+ "WhereOptions",
+ "RankOptions",
+ "ReverseSequenceOptions",
+ "MatrixDiagOptions",
+ "QuantizeOptions",
+ "MatrixSetDiagOptions",
+ "HardSwishOptions",
+ "IfOptions",
+ "WhileOptions",
+ "DepthToSpaceOptions",
+ "NonMaxSuppressionV4Options",
+ "NonMaxSuppressionV5Options",
+ "ScatterNdOptions",
+ "SelectV2Options",
+ "DensifyOptions",
+ "SegmentSumOptions",
+ "BatchMatMulOptions",
+ "CumsumOptions",
+ "CallOnceOptions",
+ "BroadcastToOptions",
+ "Rfft2dOptions",
+ "Conv3DOptions",
+ "HashtableOptions",
+ "HashtableFindOptions",
+ "HashtableImportOptions",
+ "HashtableSizeOptions",
+ "VarHandleOptions",
+ "ReadVariableOptions",
+ "AssignVariableOptions",
+ "RandomOptions",
+ nullptr};
return names;
}
inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_RandomOptions))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesBuiltinOptions()[index];
}
@@ -1159,517 +1437,582 @@ template <typename T> struct BuiltinOptionsTraits
static const BuiltinOptions enum_value = BuiltinOptions_NONE;
};
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv2DOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
};
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthwiseConv2DOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
};
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatEmbeddingsOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
};
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSHProjectionOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
};
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Pool2DOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
};
-template <> struct BuiltinOptionsTraits<SVDFOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SVDFOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
};
-template <> struct BuiltinOptionsTraits<RNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RNNOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
};
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FullyConnectedOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
};
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SoftmaxOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
};
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatenationOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
};
-template <> struct BuiltinOptionsTraits<AddOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
};
-template <> struct BuiltinOptionsTraits<L2NormOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::L2NormOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
};
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LocalResponseNormalizationOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
};
-template <> struct BuiltinOptionsTraits<LSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSTMOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
};
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeBilinearOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
};
-template <> struct BuiltinOptionsTraits<CallOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
};
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReshapeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
};
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SkipGramOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
};
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToDepthOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
};
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EmbeddingLookupSparseOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
};
-template <> struct BuiltinOptionsTraits<MulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MulOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
};
-template <> struct BuiltinOptionsTraits<PadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
};
-template <> struct BuiltinOptionsTraits<GatherOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
};
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchToSpaceNDOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
};
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToBatchNDOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
};
-template <> struct BuiltinOptionsTraits<TransposeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
};
-template <> struct BuiltinOptionsTraits<ReducerOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReducerOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
};
-template <> struct BuiltinOptionsTraits<SubOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SubOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
};
-template <> struct BuiltinOptionsTraits<DivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DivOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
};
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SqueezeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
};
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SequenceRNNOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
};
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::StridedSliceOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
};
-template <> struct BuiltinOptionsTraits<ExpOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
};
-template <> struct BuiltinOptionsTraits<TopKV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::TopKV2Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
};
-template <> struct BuiltinOptionsTraits<SplitOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
};
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogSoftmaxOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
};
-template <> struct BuiltinOptionsTraits<CastOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CastOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
};
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DequantizeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
};
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MaximumMinimumOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
};
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMaxOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
};
-template <> struct BuiltinOptionsTraits<LessOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
};
-template <> struct BuiltinOptionsTraits<NegOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NegOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
};
-template <> struct BuiltinOptionsTraits<PadV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadV2Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
};
-template <> struct BuiltinOptionsTraits<GreaterOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
};
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterEqualOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
};
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessEqualOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
};
-template <> struct BuiltinOptionsTraits<SelectOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
};
-template <> struct BuiltinOptionsTraits<SliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SliceOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
};
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeConvOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
};
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SparseToDenseOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
};
-template <> struct BuiltinOptionsTraits<TileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TileOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
};
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpandDimsOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
};
-template <> struct BuiltinOptionsTraits<EqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EqualOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
};
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NotEqualOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
};
-template <> struct BuiltinOptionsTraits<ShapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ShapeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
};
-template <> struct BuiltinOptionsTraits<PowOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PowOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
};
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMinOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
};
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FakeQuantOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
};
-template <> struct BuiltinOptionsTraits<PackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PackOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
};
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalOrOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
};
-template <> struct BuiltinOptionsTraits<OneHotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::OneHotOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
};
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalAndOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
};
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalNotOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
};
-template <> struct BuiltinOptionsTraits<UnpackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnpackOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
};
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorDivOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
};
-template <> struct BuiltinOptionsTraits<SquareOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquareOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
};
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ZerosLikeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
};
-template <> struct BuiltinOptionsTraits<FillOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FillOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
};
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceLSTMOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
};
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceRNNOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
};
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnidirectionalSequenceLSTMOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
};
-template <> struct BuiltinOptionsTraits<FloorModOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorModOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
};
-template <> struct BuiltinOptionsTraits<RangeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RangeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
};
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeNearestNeighborOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
};
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LeakyReluOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
};
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquaredDifferenceOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
};
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MirrorPadOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
};
-template <> struct BuiltinOptionsTraits<AbsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AbsOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
};
-template <> struct BuiltinOptionsTraits<SplitVOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitVOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
};
-template <> struct BuiltinOptionsTraits<UniqueOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UniqueOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
};
-template <> struct BuiltinOptionsTraits<ReverseV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseV2Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
};
-template <> struct BuiltinOptionsTraits<AddNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddNOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
};
-template <> struct BuiltinOptionsTraits<GatherNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherNdOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
};
-template <> struct BuiltinOptionsTraits<CosOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CosOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
};
-template <> struct BuiltinOptionsTraits<WhereOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhereOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
};
-template <> struct BuiltinOptionsTraits<RankOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RankOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
};
-template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseSequenceOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
};
-template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixDiagOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
};
-template <> struct BuiltinOptionsTraits<QuantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::QuantizeOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
};
-template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixSetDiagOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
};
-template <> struct BuiltinOptionsTraits<HardSwishOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::HardSwishOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
};
-template <> struct BuiltinOptionsTraits<IfOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::IfOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
};
-template <> struct BuiltinOptionsTraits<WhileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhileOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
};
-template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthToSpaceOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
};
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV4Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
};
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV5Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
};
-template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ScatterNdOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
};
-template <> struct BuiltinOptionsTraits<SelectV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectV2Options>
{
static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
};
-template <> struct BuiltinOptionsTraits<DensifyOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DensifyOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
};
-template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SegmentSumOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
};
-template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchMatMulOptions>
{
static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
};
+template <> struct BuiltinOptionsTraits<onert_tflite::CumsumOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOnceOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::BroadcastToOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Rfft2dOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv3DOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableFindOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableImportOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableSizeOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::VarHandleOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::ReadVariableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::AssignVariableOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::RandomOptions>
+{
+ static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
const flatbuffers::Vector<uint8_t> *types);
-enum Padding
+enum Padding : int8_t
{
Padding_SAME = 0,
Padding_VALID = 1,
@@ -1685,17 +2028,19 @@ inline const Padding (&EnumValuesPadding())[2]
inline const char *const *EnumNamesPadding()
{
- static const char *const names[] = {"SAME", "VALID", nullptr};
+ static const char *const names[3] = {"SAME", "VALID", nullptr};
return names;
}
inline const char *EnumNamePadding(Padding e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesPadding()[index];
}
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
{
ActivationFunctionType_NONE = 0,
ActivationFunctionType_RELU = 1,
@@ -1717,18 +2062,20 @@ inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
inline const char *const *EnumNamesActivationFunctionType()
{
- static const char *const names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
- "TANH", "SIGN_BIT", nullptr};
+ static const char *const names[7] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6",
+ "TANH", "SIGN_BIT", nullptr};
return names;
}
inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesActivationFunctionType()[index];
}
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
{
LSHProjectionType_UNKNOWN = 0,
LSHProjectionType_SPARSE = 1,
@@ -1746,17 +2093,19 @@ inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
inline const char *const *EnumNamesLSHProjectionType()
{
- static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+ static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
return names;
}
inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesLSHProjectionType()[index];
}
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
{
FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
@@ -1774,17 +2123,20 @@ inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOption
inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
{
- static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
+ static const char *const names[3] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
return names;
}
inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
}
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
{
LSTMKernelType_FULL = 0,
LSTMKernelType_BASIC = 1,
@@ -1800,17 +2152,19 @@ inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
inline const char *const *EnumNamesLSTMKernelType()
{
- static const char *const names[] = {"FULL", "BASIC", nullptr};
+ static const char *const names[3] = {"FULL", "BASIC", nullptr};
return names;
}
inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesLSTMKernelType()[index];
}
-enum CombinerType
+enum CombinerType : int8_t
{
CombinerType_SUM = 0,
CombinerType_MEAN = 1,
@@ -1827,17 +2181,19 @@ inline const CombinerType (&EnumValuesCombinerType())[3]
inline const char *const *EnumNamesCombinerType()
{
- static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+ static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr};
return names;
}
inline const char *EnumNameCombinerType(CombinerType e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesCombinerType()[index];
}
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
{
MirrorPadMode_REFLECT = 0,
MirrorPadMode_SYMMETRIC = 1,
@@ -1853,17 +2209,19 @@ inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
inline const char *const *EnumNamesMirrorPadMode()
{
- static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+ static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr};
return names;
}
inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesMirrorPadMode()[index];
}
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
{
CustomOptionsFormat_FLEXBUFFERS = 0,
CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -1878,19 +2236,22 @@ inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
inline const char *const *EnumNamesCustomOptionsFormat()
{
- static const char *const names[] = {"FLEXBUFFERS", nullptr};
+ static const char *const names[2] = {"FLEXBUFFERS", nullptr};
return names;
}
inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
{
- const size_t index = static_cast<int>(e);
+ if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS))
+ return "";
+ const size_t index = static_cast<size_t>(e);
return EnumNamesCustomOptionsFormat()[index];
}
struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef CustomQuantizationBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_CUSTOM = 4
};
@@ -1907,6 +2268,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct CustomQuantizationBuilder
{
+ typedef CustomQuantization Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
@@ -1917,7 +2279,6 @@ struct CustomQuantizationBuilder
{
start_ = fbb_.StartTable();
}
- CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
flatbuffers::Offset<CustomQuantization> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -1939,13 +2300,18 @@ inline flatbuffers::Offset<CustomQuantization>
CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<uint8_t> *custom = nullptr)
{
- return onert_tflite::CreateCustomQuantization(_fbb,
- custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+ if (custom)
+ {
+ _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16);
+ }
+ auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+ return onert_tflite::CreateCustomQuantization(_fbb, custom__);
}
struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef QuantizationParametersBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_MIN = 4,
VT_MAX = 6,
@@ -1971,16 +2337,16 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
{
return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
}
- QuantizationDetails details_type() const
+ onert_tflite::QuantizationDetails details_type() const
{
- return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+ return static_cast<onert_tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
}
const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
template <typename T> const T *details_as() const;
- const CustomQuantization *details_as_CustomQuantization() const
+ const onert_tflite::CustomQuantization *details_as_CustomQuantization() const
{
- return details_type() == QuantizationDetails_CustomQuantization
- ? static_cast<const CustomQuantization *>(details())
+ return details_type() == onert_tflite::QuantizationDetails_CustomQuantization
+ ? static_cast<const onert_tflite::CustomQuantization *>(details())
: nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
@@ -1998,13 +2364,15 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
};
template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+inline const onert_tflite::CustomQuantization *
+QuantizationParameters::details_as<onert_tflite::CustomQuantization>() const
{
return details_as_CustomQuantization();
}
struct QuantizationParametersBuilder
{
+ typedef QuantizationParameters Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
@@ -2023,7 +2391,7 @@ struct QuantizationParametersBuilder
{
fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
}
- void add_details_type(QuantizationDetails details_type)
+ void add_details_type(onert_tflite::QuantizationDetails details_type)
{
fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
static_cast<uint8_t>(details_type), 0);
@@ -2041,7 +2409,6 @@ struct QuantizationParametersBuilder
{
start_ = fbb_.StartTable();
}
- QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
flatbuffers::Offset<QuantizationParameters> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2050,14 +2417,13 @@ struct QuantizationParametersBuilder
}
};
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+ flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+ onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
QuantizationParametersBuilder builder_(_fbb);
builder_.add_quantized_dimension(quantized_dimension);
@@ -2074,19 +2440,21 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersD
flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
const std::vector<int64_t> *zero_point = nullptr,
- QuantizationDetails details_type = QuantizationDetails_NONE,
+ onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
- return onert_tflite::CreateQuantizationParameters(
- _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
- scale ? _fbb.CreateVector<float>(*scale) : 0,
- zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
- quantized_dimension);
+ auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+ auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+ auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+ auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+ return onert_tflite::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__,
+ details_type, details, quantized_dimension);
}
struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef Int32VectorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VALUES = 4
};
@@ -2103,6 +2471,7 @@ struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct Int32VectorBuilder
{
+ typedef Int32Vector Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
@@ -2113,7 +2482,6 @@ struct Int32VectorBuilder
{
start_ = fbb_.StartTable();
}
- Int32VectorBuilder &operator=(const Int32VectorBuilder &);
flatbuffers::Offset<Int32Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2135,12 +2503,14 @@ inline flatbuffers::Offset<Int32Vector>
CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<int32_t> *values = nullptr)
{
- return onert_tflite::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+ auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+ return onert_tflite::CreateInt32Vector(_fbb, values__);
}
struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef Uint16VectorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VALUES = 4
};
@@ -2157,6 +2527,7 @@ struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct Uint16VectorBuilder
{
+ typedef Uint16Vector Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
@@ -2167,7 +2538,6 @@ struct Uint16VectorBuilder
{
start_ = fbb_.StartTable();
}
- Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
flatbuffers::Offset<Uint16Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2189,12 +2559,18 @@ inline flatbuffers::Offset<Uint16Vector>
CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<uint16_t> *values = nullptr)
{
- return onert_tflite::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+ if (values)
+ {
+ _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4);
+ }
+ auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+ return onert_tflite::CreateUint16Vector(_fbb, values__);
}
struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef Uint8VectorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VALUES = 4
};
@@ -2211,6 +2587,7 @@ struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct Uint8VectorBuilder
{
+ typedef Uint8Vector Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
@@ -2221,7 +2598,6 @@ struct Uint8VectorBuilder
{
start_ = fbb_.StartTable();
}
- Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
flatbuffers::Offset<Uint8Vector> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2243,12 +2619,18 @@ inline flatbuffers::Offset<Uint8Vector>
CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<uint8_t> *values = nullptr)
{
- return onert_tflite::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+ if (values)
+ {
+ _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4);
+ }
+ auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+ return onert_tflite::CreateUint8Vector(_fbb, values__);
}
struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef DimensionMetadataBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FORMAT = 4,
VT_DENSE_SIZE = 6,
@@ -2257,57 +2639,59 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_ARRAY_INDICES_TYPE = 12,
VT_ARRAY_INDICES = 14
};
- DimensionType format() const
+ onert_tflite::DimensionType format() const
{
- return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+ return static_cast<onert_tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
}
int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
- SparseIndexVector array_segments_type() const
+ onert_tflite::SparseIndexVector array_segments_type() const
{
- return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+ return static_cast<onert_tflite::SparseIndexVector>(
+ GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
}
const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
template <typename T> const T *array_segments_as() const;
- const Int32Vector *array_segments_as_Int32Vector() const
+ const onert_tflite::Int32Vector *array_segments_as_Int32Vector() const
{
- return array_segments_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_segments())
+ return array_segments_type() == onert_tflite::SparseIndexVector_Int32Vector
+ ? static_cast<const onert_tflite::Int32Vector *>(array_segments())
: nullptr;
}
- const Uint16Vector *array_segments_as_Uint16Vector() const
+ const onert_tflite::Uint16Vector *array_segments_as_Uint16Vector() const
{
- return array_segments_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_segments())
+ return array_segments_type() == onert_tflite::SparseIndexVector_Uint16Vector
+ ? static_cast<const onert_tflite::Uint16Vector *>(array_segments())
: nullptr;
}
- const Uint8Vector *array_segments_as_Uint8Vector() const
+ const onert_tflite::Uint8Vector *array_segments_as_Uint8Vector() const
{
- return array_segments_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_segments())
+ return array_segments_type() == onert_tflite::SparseIndexVector_Uint8Vector
+ ? static_cast<const onert_tflite::Uint8Vector *>(array_segments())
: nullptr;
}
- SparseIndexVector array_indices_type() const
+ onert_tflite::SparseIndexVector array_indices_type() const
{
- return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+ return static_cast<onert_tflite::SparseIndexVector>(
+ GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
}
const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
template <typename T> const T *array_indices_as() const;
- const Int32Vector *array_indices_as_Int32Vector() const
+ const onert_tflite::Int32Vector *array_indices_as_Int32Vector() const
{
- return array_indices_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_indices())
+ return array_indices_type() == onert_tflite::SparseIndexVector_Int32Vector
+ ? static_cast<const onert_tflite::Int32Vector *>(array_indices())
: nullptr;
}
- const Uint16Vector *array_indices_as_Uint16Vector() const
+ const onert_tflite::Uint16Vector *array_indices_as_Uint16Vector() const
{
- return array_indices_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_indices())
+ return array_indices_type() == onert_tflite::SparseIndexVector_Uint16Vector
+ ? static_cast<const onert_tflite::Uint16Vector *>(array_indices())
: nullptr;
}
- const Uint8Vector *array_indices_as_Uint8Vector() const
+ const onert_tflite::Uint8Vector *array_indices_as_Uint8Vector() const
{
- return array_indices_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_indices())
+ return array_indices_type() == onert_tflite::SparseIndexVector_Uint8Vector
+ ? static_cast<const onert_tflite::Uint8Vector *>(array_indices())
: nullptr;
}
bool Verify(flatbuffers::Verifier &verifier) const
@@ -2324,41 +2708,54 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
}
};
-template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Int32Vector>() const
{
return array_segments_as_Int32Vector();
}
-template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint16Vector>() const
{
return array_segments_as_Uint16Vector();
}
-template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint8Vector>() const
{
return array_segments_as_Uint8Vector();
}
-template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Int32Vector>() const
{
return array_indices_as_Int32Vector();
}
-template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint16Vector>() const
{
return array_indices_as_Uint16Vector();
}
-template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint8Vector>() const
{
return array_indices_as_Uint8Vector();
}
struct DimensionMetadataBuilder
{
+ typedef DimensionMetadata Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_format(DimensionType format)
+ void add_format(onert_tflite::DimensionType format)
{
fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
}
@@ -2366,7 +2763,7 @@ struct DimensionMetadataBuilder
{
fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
}
- void add_array_segments_type(SparseIndexVector array_segments_type)
+ void add_array_segments_type(onert_tflite::SparseIndexVector array_segments_type)
{
fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
static_cast<uint8_t>(array_segments_type), 0);
@@ -2375,7 +2772,7 @@ struct DimensionMetadataBuilder
{
fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
}
- void add_array_indices_type(SparseIndexVector array_indices_type)
+ void add_array_indices_type(onert_tflite::SparseIndexVector array_indices_type)
{
fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
static_cast<uint8_t>(array_indices_type), 0);
@@ -2388,7 +2785,6 @@ struct DimensionMetadataBuilder
{
start_ = fbb_.StartTable();
}
- DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
flatbuffers::Offset<DimensionMetadata> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2397,13 +2793,13 @@ struct DimensionMetadataBuilder
}
};
-inline flatbuffers::Offset<DimensionMetadata>
-CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
- DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
- SparseIndexVector array_segments_type = SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_segments = 0,
- SparseIndexVector array_indices_type = SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_indices = 0)
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::DimensionType format = onert_tflite::DimensionType_DENSE, int32_t dense_size = 0,
+ onert_tflite::SparseIndexVector array_segments_type = onert_tflite::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_segments = 0,
+ onert_tflite::SparseIndexVector array_indices_type = onert_tflite::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_indices = 0)
{
DimensionMetadataBuilder builder_(_fbb);
builder_.add_array_indices(array_indices);
@@ -2417,7 +2813,8 @@ CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SparsityParametersBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_TRAVERSAL_ORDER = 4,
VT_BLOCK_MAP = 6,
@@ -2431,9 +2828,11 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
}
- const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *
+ dim_metadata() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
+ return GetPointer<
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *>(
VT_DIM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
@@ -2448,6 +2847,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SparsityParametersBuilder
{
+ typedef SparsityParameters Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
@@ -2459,7 +2859,8 @@ struct SparsityParametersBuilder
fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
}
void add_dim_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+ dim_metadata)
{
fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
}
@@ -2467,7 +2868,6 @@ struct SparsityParametersBuilder
{
start_ = fbb_.StartTable();
}
- SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
flatbuffers::Offset<SparsityParameters> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2480,7 +2880,8 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
flatbuffers::FlatBufferBuilder &_fbb,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+ dim_metadata = 0)
{
SparsityParametersBuilder builder_(_fbb);
builder_.add_dim_metadata(dim_metadata);
@@ -2492,17 +2893,22 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
const std::vector<int32_t> *block_map = nullptr,
- const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+ const std::vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *dim_metadata = nullptr)
{
- return onert_tflite::CreateSparsityParameters(
- _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
- block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
- dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+ auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+ auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+ auto dim_metadata__ =
+ dim_metadata
+ ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>(*dim_metadata)
+ : 0;
+ return onert_tflite::CreateSparsityParameters(_fbb, traversal_order__, block_map__,
+ dim_metadata__);
}
struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef TensorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_SHAPE = 4,
VT_TYPE = 6,
@@ -2517,20 +2923,23 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
}
- TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+ onert_tflite::TensorType type() const
+ {
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+ }
uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
const flatbuffers::String *name() const
{
return GetPointer<const flatbuffers::String *>(VT_NAME);
}
- const QuantizationParameters *quantization() const
+ const onert_tflite::QuantizationParameters *quantization() const
{
- return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+ return GetPointer<const onert_tflite::QuantizationParameters *>(VT_QUANTIZATION);
}
bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
- const SparsityParameters *sparsity() const
+ const onert_tflite::SparsityParameters *sparsity() const
{
- return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+ return GetPointer<const onert_tflite::SparsityParameters *>(VT_SPARSITY);
}
const flatbuffers::Vector<int32_t> *shape_signature() const
{
@@ -2551,13 +2960,14 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct TensorBuilder
{
+ typedef Tensor Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
{
fbb_.AddOffset(Tensor::VT_SHAPE, shape);
}
- void add_type(TensorType type)
+ void add_type(onert_tflite::TensorType type)
{
fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
}
@@ -2566,7 +2976,7 @@ struct TensorBuilder
{
fbb_.AddOffset(Tensor::VT_NAME, name);
}
- void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+ void add_quantization(flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization)
{
fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
}
@@ -2574,7 +2984,7 @@ struct TensorBuilder
{
fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
}
- void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+ void add_sparsity(flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity)
{
fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
}
@@ -2586,7 +2996,6 @@ struct TensorBuilder
{
start_ = fbb_.StartTable();
}
- TensorBuilder &operator=(const TensorBuilder &);
flatbuffers::Offset<Tensor> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2595,14 +3004,13 @@ struct TensorBuilder
}
};
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<SparsityParameters> sparsity = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+inline flatbuffers::Offset<Tensor> CreateTensor(
+ flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+ onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+ bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
{
TensorBuilder builder_(_fbb);
builder_.add_shape_signature(shape_signature);
@@ -2618,20 +3026,23 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+ const char *name = nullptr,
+ flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+ bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
const std::vector<int32_t> *shape_signature = nullptr)
{
- return onert_tflite::CreateTensor(
- _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
- name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
- shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+ auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+ return onert_tflite::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable,
+ sparsity, shape_signature__);
}
struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef Conv2DOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_PADDING = 4,
VT_STRIDE_W = 6,
@@ -2640,12 +3051,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_DILATION_W_FACTOR = 12,
VT_DILATION_H_FACTOR = 14
};
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ onert_tflite::Padding padding() const
+ {
+ return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2662,9 +3077,10 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct Conv2DOptionsBuilder
{
+ typedef Conv2DOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
+ void add_padding(onert_tflite::Padding padding)
{
fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
}
@@ -2676,7 +3092,7 @@ struct Conv2DOptionsBuilder
{
fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
}
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -2693,7 +3109,6 @@ struct Conv2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
flatbuffers::Offset<Conv2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2703,9 +3118,11 @@ struct Conv2DOptionsBuilder
};
inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::Padding padding = onert_tflite::Padding_SAME,
int32_t stride_w = 0, int32_t stride_h = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
Conv2DOptionsBuilder builder_(_fbb);
@@ -2718,9 +3135,121 @@ CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
return builder_.Finish();
}
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef Conv3DOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_PADDING = 4,
+ VT_STRIDE_D = 6,
+ VT_STRIDE_W = 8,
+ VT_STRIDE_H = 10,
+ VT_FUSED_ACTIVATION_FUNCTION = 12,
+ VT_DILATION_D_FACTOR = 14,
+ VT_DILATION_W_FACTOR = 16,
+ VT_DILATION_H_FACTOR = 18
+ };
+ onert_tflite::Padding padding() const
+ {
+ return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
+ int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+ int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+ int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+ onert_tflite::ActivationFunctionType fused_activation_function() const
+ {
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ }
+ int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+ int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+ int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+ VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+ VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+ }
+};
+
+struct Conv3DOptionsBuilder
+{
+ typedef Conv3DOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_padding(onert_tflite::Padding padding)
+ {
+ fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+ }
+ void add_stride_d(int32_t stride_d)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+ }
+ void add_stride_w(int32_t stride_w)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+ }
+ void add_stride_h(int32_t stride_h)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+ }
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
+ {
+ fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+ static_cast<int8_t>(fused_activation_function), 0);
+ }
+ void add_dilation_d_factor(int32_t dilation_d_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+ }
+ void add_dilation_w_factor(int32_t dilation_w_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+ }
+ void add_dilation_h_factor(int32_t dilation_h_factor)
+ {
+ fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+ }
+ explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<Conv3DOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Conv3DOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+ int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
+ int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+ Conv3DOptionsBuilder builder_(_fbb);
+ builder_.add_dilation_h_factor(dilation_h_factor);
+ builder_.add_dilation_w_factor(dilation_w_factor);
+ builder_.add_dilation_d_factor(dilation_d_factor);
+ builder_.add_stride_h(stride_h);
+ builder_.add_stride_w(stride_w);
+ builder_.add_stride_d(stride_d);
+ builder_.add_fused_activation_function(fused_activation_function);
+ builder_.add_padding(padding);
+ return builder_.Finish();
+}
+
struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef Pool2DOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_PADDING = 4,
VT_STRIDE_W = 6,
@@ -2729,14 +3258,18 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_FILTER_HEIGHT = 12,
VT_FUSED_ACTIVATION_FUNCTION = 14
};
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ onert_tflite::Padding padding() const
+ {
+ return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -2751,9 +3284,10 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct Pool2DOptionsBuilder
{
+ typedef Pool2DOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
+ void add_padding(onert_tflite::Padding padding)
{
fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
}
@@ -2773,7 +3307,7 @@ struct Pool2DOptionsBuilder
{
fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
}
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -2782,7 +3316,6 @@ struct Pool2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
flatbuffers::Offset<Pool2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2791,11 +3324,11 @@ struct Pool2DOptionsBuilder
}
};
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
- int32_t filter_height = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+ flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE)
{
Pool2DOptionsBuilder builder_(_fbb);
builder_.add_filter_height(filter_height);
@@ -2809,7 +3342,8 @@ CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef DepthwiseConv2DOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_PADDING = 4,
VT_STRIDE_W = 6,
@@ -2819,13 +3353,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
VT_DILATION_W_FACTOR = 14,
VT_DILATION_H_FACTOR = 16
};
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ onert_tflite::Padding padding() const
+ {
+ return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2843,9 +3381,10 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
struct DepthwiseConv2DOptionsBuilder
{
+ typedef DepthwiseConv2DOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
+ void add_padding(onert_tflite::Padding padding)
{
fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
}
@@ -2861,7 +3400,7 @@ struct DepthwiseConv2DOptionsBuilder
{
fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
}
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -2878,7 +3417,6 @@ struct DepthwiseConv2DOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2888,9 +3426,10 @@ struct DepthwiseConv2DOptionsBuilder
};
inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
- int32_t stride_h = 0, int32_t depth_multiplier = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
DepthwiseConv2DOptionsBuilder builder_(_fbb);
@@ -2906,7 +3445,8 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ConcatEmbeddingsOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NUM_CHANNELS = 4,
VT_NUM_COLUMNS_PER_CHANNEL = 6,
@@ -2933,6 +3473,7 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
struct ConcatEmbeddingsOptionsBuilder
{
+ typedef ConcatEmbeddingsOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_num_channels(int32_t num_channels)
@@ -2954,7 +3495,6 @@ struct ConcatEmbeddingsOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -2980,21 +3520,24 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
const std::vector<int32_t> *num_columns_per_channel = nullptr,
const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
{
- return onert_tflite::CreateConcatEmbeddingsOptions(
- _fbb, num_channels,
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+ auto num_columns_per_channel__ =
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+ auto embedding_dim_per_channel__ =
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+ return onert_tflite::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
+ embedding_dim_per_channel__);
}
struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef LSHProjectionOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_TYPE = 4
};
- LSHProjectionType type() const
+ onert_tflite::LSHProjectionType type() const
{
- return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+ return static_cast<onert_tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -3005,9 +3548,10 @@ struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LSHProjectionOptionsBuilder
{
+ typedef LSHProjectionOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_type(LSHProjectionType type)
+ void add_type(onert_tflite::LSHProjectionType type)
{
fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
}
@@ -3015,7 +3559,6 @@ struct LSHProjectionOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
flatbuffers::Offset<LSHProjectionOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3024,9 +3567,9 @@ struct LSHProjectionOptionsBuilder
}
};
-inline flatbuffers::Offset<LSHProjectionOptions>
-CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
- LSHProjectionType type = LSHProjectionType_UNKNOWN)
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::LSHProjectionType type = onert_tflite::LSHProjectionType_UNKNOWN)
{
LSHProjectionOptionsBuilder builder_(_fbb);
builder_.add_type(type);
@@ -3035,16 +3578,18 @@ CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SVDFOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_RANK = 4,
VT_FUSED_ACTIVATION_FUNCTION = 6,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
};
int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3060,10 +3605,11 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SVDFOptionsBuilder
{
+ typedef SVDFOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3077,7 +3623,6 @@ struct SVDFOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
flatbuffers::Offset<SVDFOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3088,7 +3633,8 @@ struct SVDFOptionsBuilder
inline flatbuffers::Offset<SVDFOptions>
CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
bool asymmetric_quantize_inputs = false)
{
SVDFOptionsBuilder builder_(_fbb);
@@ -3100,14 +3646,16 @@ CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef RNNOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3123,9 +3671,10 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct RNNOptionsBuilder
{
+ typedef RNNOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3139,7 +3688,6 @@ struct RNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
flatbuffers::Offset<RNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3150,7 +3698,8 @@ struct RNNOptionsBuilder
inline flatbuffers::Offset<RNNOptions>
CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
bool asymmetric_quantize_inputs = false)
{
RNNOptionsBuilder builder_(_fbb);
@@ -3161,16 +3710,18 @@ CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SequenceRNNOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_TIME_MAJOR = 4,
VT_FUSED_ACTIVATION_FUNCTION = 6,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
};
bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3186,6 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SequenceRNNOptionsBuilder
{
+ typedef SequenceRNNOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_time_major(bool time_major)
@@ -3193,7 +3745,7 @@ struct SequenceRNNOptionsBuilder
fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
0);
}
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3207,7 +3759,6 @@ struct SequenceRNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
flatbuffers::Offset<SequenceRNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3216,10 +3767,11 @@ struct SequenceRNNOptionsBuilder
}
};
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<SequenceRNNOptions>
+CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3230,7 +3782,8 @@ inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_TIME_MAJOR = 4,
VT_FUSED_ACTIVATION_FUNCTION = 6,
@@ -3238,9 +3791,10 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
};
bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
bool asymmetric_quantize_inputs() const
@@ -3258,6 +3812,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
struct BidirectionalSequenceRNNOptionsBuilder
{
+ typedef BidirectionalSequenceRNNOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_time_major(bool time_major)
@@ -3265,7 +3820,7 @@ struct BidirectionalSequenceRNNOptionsBuilder
fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
static_cast<uint8_t>(time_major), 0);
}
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3284,7 +3839,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3295,7 +3849,8 @@ struct BidirectionalSequenceRNNOptionsBuilder
inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
@@ -3308,20 +3863,23 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef FullyConnectedOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_WEIGHTS_FORMAT = 6,
VT_KEEP_NUM_DIMS = 8,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
- FullyConnectedOptionsWeightsFormat weights_format() const
+ onert_tflite::FullyConnectedOptionsWeightsFormat weights_format() const
{
- return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+ return static_cast<onert_tflite::FullyConnectedOptionsWeightsFormat>(
+ GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
}
bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
bool asymmetric_quantize_inputs() const
@@ -3340,14 +3898,15 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
struct FullyConnectedOptionsBuilder
{
+ typedef FullyConnectedOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
}
- void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+ void add_weights_format(onert_tflite::FullyConnectedOptionsWeightsFormat weights_format)
{
fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
static_cast<int8_t>(weights_format), 0);
@@ -3366,7 +3925,6 @@ struct FullyConnectedOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
flatbuffers::Offset<FullyConnectedOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3375,11 +3933,13 @@ struct FullyConnectedOptionsBuilder
}
};
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
- bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<FullyConnectedOptions>
+CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
+ onert_tflite::FullyConnectedOptionsWeightsFormat weights_format =
+ onert_tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
{
FullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3391,7 +3951,8 @@ inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SoftmaxOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_BETA = 4
};
@@ -3405,6 +3966,7 @@ struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SoftmaxOptionsBuilder
{
+ typedef SoftmaxOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
@@ -3412,7 +3974,6 @@ struct SoftmaxOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
flatbuffers::Offset<SoftmaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3431,15 +3992,17 @@ CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ConcatenationOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_AXIS = 4,
VT_FUSED_ACTIVATION_FUNCTION = 6
};
int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -3450,10 +4013,11 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ConcatenationOptionsBuilder
{
+ typedef ConcatenationOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3462,7 +4026,6 @@ struct ConcatenationOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
flatbuffers::Offset<ConcatenationOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3471,9 +4034,10 @@ struct ConcatenationOptionsBuilder
}
};
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<ConcatenationOptions>
+CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE)
{
ConcatenationOptionsBuilder builder_(_fbb);
builder_.add_axis(axis);
@@ -3483,35 +4047,45 @@ inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef AddOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_FUSED_ACTIVATION_FUNCTION = 4
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_POT_SCALE_INT16 = 6
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
+ bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
}
};
struct AddOptionsBuilder
{
+ typedef AddOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
}
+ void add_pot_scale_int16(bool pot_scale_int16)
+ {
+ fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+ 1);
+ }
explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- AddOptionsBuilder &operator=(const AddOptionsBuilder &);
flatbuffers::Offset<AddOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3522,22 +4096,27 @@ struct AddOptionsBuilder
inline flatbuffers::Offset<AddOptions>
CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
+ bool pot_scale_int16 = true)
{
AddOptionsBuilder builder_(_fbb);
+ builder_.add_pot_scale_int16(pot_scale_int16);
builder_.add_fused_activation_function(fused_activation_function);
return builder_.Finish();
}
struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef MulOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -3548,9 +4127,10 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct MulOptionsBuilder
{
+ typedef MulOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3559,7 +4139,6 @@ struct MulOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MulOptionsBuilder &operator=(const MulOptionsBuilder &);
flatbuffers::Offset<MulOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3570,7 +4149,8 @@ struct MulOptionsBuilder
inline flatbuffers::Offset<MulOptions>
CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE)
{
MulOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -3579,13 +4159,15 @@ CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef L2NormOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -3596,9 +4178,10 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct L2NormOptionsBuilder
{
+ typedef L2NormOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3607,7 +4190,6 @@ struct L2NormOptionsBuilder
{
start_ = fbb_.StartTable();
}
- L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
flatbuffers::Offset<L2NormOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3618,7 +4200,8 @@ struct L2NormOptionsBuilder
inline flatbuffers::Offset<L2NormOptions>
CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE)
{
L2NormOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -3627,7 +4210,8 @@ CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef LocalResponseNormalizationOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_RADIUS = 4,
VT_BIAS = 6,
@@ -3648,6 +4232,7 @@ struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatb
struct LocalResponseNormalizationOptionsBuilder
{
+ typedef LocalResponseNormalizationOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_radius(int32_t radius)
@@ -3671,8 +4256,6 @@ struct LocalResponseNormalizationOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LocalResponseNormalizationOptionsBuilder &
- operator=(const LocalResponseNormalizationOptionsBuilder &);
flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3695,7 +4278,8 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, in
struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef LSTMOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_CELL_CLIP = 6,
@@ -3703,15 +4287,16 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_KERNEL_TYPE = 10,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
- LSTMKernelType kernel_type() const
+ onert_tflite::LSTMKernelType kernel_type() const
{
- return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+ return static_cast<onert_tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3730,9 +4315,10 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LSTMOptionsBuilder
{
+ typedef LSTMOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3745,7 +4331,7 @@ struct LSTMOptionsBuilder
{
fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
}
- void add_kernel_type(LSTMKernelType kernel_type)
+ void add_kernel_type(onert_tflite::LSTMKernelType kernel_type)
{
fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
}
@@ -3758,7 +4344,6 @@ struct LSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
flatbuffers::Offset<LSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3769,9 +4354,10 @@ struct LSTMOptionsBuilder
inline flatbuffers::Offset<LSTMOptions>
CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
float cell_clip = 0.0f, float proj_clip = 0.0f,
- LSTMKernelType kernel_type = LSTMKernelType_FULL,
+ onert_tflite::LSTMKernelType kernel_type = onert_tflite::LSTMKernelType_FULL,
bool asymmetric_quantize_inputs = false)
{
LSTMOptionsBuilder builder_(_fbb);
@@ -3785,7 +4371,8 @@ CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_CELL_CLIP = 6,
@@ -3793,9 +4380,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
VT_TIME_MAJOR = 10,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3817,9 +4405,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
struct UnidirectionalSequenceLSTMOptionsBuilder
{
+ typedef UnidirectionalSequenceLSTMOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3847,8 +4436,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UnidirectionalSequenceLSTMOptionsBuilder &
- operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3860,7 +4447,8 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
CreateUnidirectionalSequenceLSTMOptions(
flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
bool asymmetric_quantize_inputs = false)
{
@@ -3875,7 +4463,8 @@ CreateUnidirectionalSequenceLSTMOptions(
struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_CELL_CLIP = 6,
@@ -3884,9 +4473,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
VT_TIME_MAJOR = 12,
VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3910,9 +4500,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
struct BidirectionalSequenceLSTMOptionsBuilder
{
+ typedef BidirectionalSequenceLSTMOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -3945,8 +4536,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder
{
start_ = fbb_.StartTable();
}
- BidirectionalSequenceLSTMOptionsBuilder &
- operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -3957,7 +4546,8 @@ struct BidirectionalSequenceLSTMOptionsBuilder
inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
bool time_major = true, bool asymmetric_quantize_inputs = false)
{
@@ -3973,7 +4563,8 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ResizeBilinearOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_ALIGN_CORNERS = 8,
VT_HALF_PIXEL_CENTERS = 10
@@ -3989,6 +4580,7 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
struct ResizeBilinearOptionsBuilder
{
+ typedef ResizeBilinearOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_align_corners(bool align_corners)
@@ -4005,7 +4597,6 @@ struct ResizeBilinearOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
flatbuffers::Offset<ResizeBilinearOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4026,20 +4617,24 @@ CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_cor
struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ResizeNearestNeighborOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_ALIGN_CORNERS = 4
+ VT_ALIGN_CORNERS = 4,
+ VT_HALF_PIXEL_CENTERS = 6
};
bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+ bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
- verifier.EndTable();
+ VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
}
};
struct ResizeNearestNeighborOptionsBuilder
{
+ typedef ResizeNearestNeighborOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_align_corners(bool align_corners)
@@ -4047,11 +4642,15 @@ struct ResizeNearestNeighborOptionsBuilder
fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
static_cast<uint8_t>(align_corners), 0);
}
+ void add_half_pixel_centers(bool half_pixel_centers)
+ {
+ fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+ static_cast<uint8_t>(half_pixel_centers), 0);
+ }
explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4061,16 +4660,19 @@ struct ResizeNearestNeighborOptionsBuilder
};
inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+ bool half_pixel_centers = false)
{
ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+ builder_.add_half_pixel_centers(half_pixel_centers);
builder_.add_align_corners(align_corners);
return builder_.Finish();
}
struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef CallOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_SUBGRAPH = 4
};
@@ -4084,6 +4686,7 @@ struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct CallOptionsBuilder
{
+ typedef CallOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_subgraph(uint32_t subgraph)
@@ -4094,7 +4697,6 @@ struct CallOptionsBuilder
{
start_ = fbb_.StartTable();
}
- CallOptionsBuilder &operator=(const CallOptionsBuilder &);
flatbuffers::Offset<CallOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4113,6 +4715,7 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBuffe
struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef PadOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4121,13 +4724,13 @@ struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct PadOptionsBuilder
{
+ typedef PadOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- PadOptionsBuilder &operator=(const PadOptionsBuilder &);
flatbuffers::Offset<PadOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4144,6 +4747,7 @@ inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferB
struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef PadV2OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4152,13 +4756,13 @@ struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct PadV2OptionsBuilder
{
+ typedef PadV2Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
flatbuffers::Offset<PadV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4175,7 +4779,8 @@ inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBuf
struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ReshapeOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NEW_SHAPE = 4
};
@@ -4192,6 +4797,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ReshapeOptionsBuilder
{
+ typedef ReshapeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
@@ -4202,7 +4808,6 @@ struct ReshapeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
flatbuffers::Offset<ReshapeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4224,12 +4829,13 @@ inline flatbuffers::Offset<ReshapeOptions>
CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<int32_t> *new_shape = nullptr)
{
- return onert_tflite::CreateReshapeOptions(_fbb,
- new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+ auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+ return onert_tflite::CreateReshapeOptions(_fbb, new_shape__);
}
struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SpaceToBatchNDOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4238,13 +4844,13 @@ struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
struct SpaceToBatchNDOptionsBuilder
{
+ typedef SpaceToBatchNDOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4262,6 +4868,7 @@ CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef BatchToSpaceNDOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4270,13 +4877,13 @@ struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
struct BatchToSpaceNDOptionsBuilder
{
+ typedef BatchToSpaceNDOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4294,7 +4901,8 @@ CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SkipGramOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NGRAM_SIZE = 4,
VT_MAX_SKIP_SIZE = 6,
@@ -4313,6 +4921,7 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SkipGramOptionsBuilder
{
+ typedef SkipGramOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_ngram_size(int32_t ngram_size)
@@ -4332,7 +4941,6 @@ struct SkipGramOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
flatbuffers::Offset<SkipGramOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4354,7 +4962,8 @@ CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size =
struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SpaceToDepthOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_BLOCK_SIZE = 4
};
@@ -4368,6 +4977,7 @@ struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SpaceToDepthOptionsBuilder
{
+ typedef SpaceToDepthOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_block_size(int32_t block_size)
@@ -4378,7 +4988,6 @@ struct SpaceToDepthOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
flatbuffers::Offset<SpaceToDepthOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4397,7 +5006,8 @@ CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef DepthToSpaceOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_BLOCK_SIZE = 4
};
@@ -4411,6 +5021,7 @@ struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct DepthToSpaceOptionsBuilder
{
+ typedef DepthToSpaceOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_block_size(int32_t block_size)
@@ -4421,7 +5032,6 @@ struct DepthToSpaceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
flatbuffers::Offset<DepthToSpaceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4440,35 +5050,45 @@ CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SubOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_FUSED_ACTIVATION_FUNCTION = 4
+ VT_FUSED_ACTIVATION_FUNCTION = 4,
+ VT_POT_SCALE_INT16 = 6
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
+ bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) &&
- VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+ VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+ VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
}
};
struct SubOptionsBuilder
{
+ typedef SubOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
}
+ void add_pot_scale_int16(bool pot_scale_int16)
+ {
+ fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+ 1);
+ }
explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SubOptionsBuilder &operator=(const SubOptionsBuilder &);
flatbuffers::Offset<SubOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4479,22 +5099,27 @@ struct SubOptionsBuilder
inline flatbuffers::Offset<SubOptions>
CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE,
+ bool pot_scale_int16 = true)
{
SubOptionsBuilder builder_(_fbb);
+ builder_.add_pot_scale_int16(pot_scale_int16);
builder_.add_fused_activation_function(fused_activation_function);
return builder_.Finish();
}
struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef DivOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_FUSED_ACTIVATION_FUNCTION = 4
};
- ActivationFunctionType fused_activation_function() const
+ onert_tflite::ActivationFunctionType fused_activation_function() const
{
- return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ return static_cast<onert_tflite::ActivationFunctionType>(
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4505,9 +5130,10 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct DivOptionsBuilder
{
+ typedef DivOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+ void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
{
fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
static_cast<int8_t>(fused_activation_function), 0);
@@ -4516,7 +5142,6 @@ struct DivOptionsBuilder
{
start_ = fbb_.StartTable();
}
- DivOptionsBuilder &operator=(const DivOptionsBuilder &);
flatbuffers::Offset<DivOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4527,7 +5152,8 @@ struct DivOptionsBuilder
inline flatbuffers::Offset<DivOptions>
CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ onert_tflite::ActivationFunctionType fused_activation_function =
+ onert_tflite::ActivationFunctionType_NONE)
{
DivOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -4536,6 +5162,7 @@ CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef TopKV2OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4544,13 +5171,13 @@ struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct TopKV2OptionsBuilder
{
+ typedef TopKV2Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
flatbuffers::Offset<TopKV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4567,13 +5194,14 @@ inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatB
struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef EmbeddingLookupSparseOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_COMBINER = 4
};
- CombinerType combiner() const
+ onert_tflite::CombinerType combiner() const
{
- return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+ return static_cast<onert_tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4584,9 +5212,10 @@ struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
struct EmbeddingLookupSparseOptionsBuilder
{
+ typedef EmbeddingLookupSparseOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_combiner(CombinerType combiner)
+ void add_combiner(onert_tflite::CombinerType combiner)
{
fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
static_cast<int8_t>(combiner), 0);
@@ -4595,7 +5224,6 @@ struct EmbeddingLookupSparseOptionsBuilder
{
start_ = fbb_.StartTable();
}
- EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4604,9 +5232,9 @@ struct EmbeddingLookupSparseOptionsBuilder
}
};
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
-CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
- CombinerType combiner = CombinerType_SUM)
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::CombinerType combiner = onert_tflite::CombinerType_SUM)
{
EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
builder_.add_combiner(combiner);
@@ -4615,28 +5243,35 @@ CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef GatherOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_AXIS = 4
+ VT_AXIS = 4,
+ VT_BATCH_DIMS = 6
};
int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+ int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
- verifier.EndTable();
+ VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
}
};
struct GatherOptionsBuilder
{
+ typedef GatherOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+ void add_batch_dims(int32_t batch_dims)
+ {
+ fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+ }
explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
flatbuffers::Offset<GatherOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4645,16 +5280,18 @@ struct GatherOptionsBuilder
}
};
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
- int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
{
GatherOptionsBuilder builder_(_fbb);
+ builder_.add_batch_dims(batch_dims);
builder_.add_axis(axis);
return builder_.Finish();
}
struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef TransposeOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4663,13 +5300,13 @@ struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct TransposeOptionsBuilder
{
+ typedef TransposeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
flatbuffers::Offset<TransposeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4687,6 +5324,7 @@ CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef ExpOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4695,13 +5333,13 @@ struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ExpOptionsBuilder
{
+ typedef ExpOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
flatbuffers::Offset<ExpOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4718,6 +5356,7 @@ inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferB
struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef CosOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4726,13 +5365,13 @@ struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct CosOptionsBuilder
{
+ typedef CosOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- CosOptionsBuilder &operator=(const CosOptionsBuilder &);
flatbuffers::Offset<CosOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4749,7 +5388,8 @@ inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferB
struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ReducerOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_KEEP_DIMS = 4
};
@@ -4763,6 +5403,7 @@ struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ReducerOptionsBuilder
{
+ typedef ReducerOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_keep_dims(bool keep_dims)
@@ -4773,7 +5414,6 @@ struct ReducerOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
flatbuffers::Offset<ReducerOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4792,7 +5432,8 @@ CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = fals
struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SqueezeOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_SQUEEZE_DIMS = 4
};
@@ -4809,6 +5450,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SqueezeOptionsBuilder
{
+ typedef SqueezeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
@@ -4819,7 +5461,6 @@ struct SqueezeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
flatbuffers::Offset<SqueezeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4841,13 +5482,14 @@ inline flatbuffers::Offset<SqueezeOptions>
CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<int32_t> *squeeze_dims = nullptr)
{
- return onert_tflite::CreateSqueezeOptions(
- _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+ auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+ return onert_tflite::CreateSqueezeOptions(_fbb, squeeze_dims__);
}
struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SplitOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NUM_SPLITS = 4
};
@@ -4861,6 +5503,7 @@ struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SplitOptionsBuilder
{
+ typedef SplitOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_num_splits(int32_t num_splits)
@@ -4871,7 +5514,6 @@ struct SplitOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
flatbuffers::Offset<SplitOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4890,7 +5532,8 @@ inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBuf
struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SplitVOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NUM_SPLITS = 4
};
@@ -4904,6 +5547,7 @@ struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SplitVOptionsBuilder
{
+ typedef SplitVOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_num_splits(int32_t num_splits)
@@ -4914,7 +5558,6 @@ struct SplitVOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
flatbuffers::Offset<SplitVOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -4933,7 +5576,8 @@ inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatB
struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef StridedSliceOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_BEGIN_MASK = 4,
VT_END_MASK = 6,
@@ -4958,6 +5602,7 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct StridedSliceOptionsBuilder
{
+ typedef StridedSliceOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_begin_mask(int32_t begin_mask)
@@ -4984,7 +5629,6 @@ struct StridedSliceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
flatbuffers::Offset<StridedSliceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5009,6 +5653,7 @@ CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_ma
struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LogSoftmaxOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5017,13 +5662,13 @@ struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LogSoftmaxOptionsBuilder
{
+ typedef LogSoftmaxOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
flatbuffers::Offset<LogSoftmaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5041,18 +5686,19 @@ CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef CastOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_IN_DATA_TYPE = 4,
VT_OUT_DATA_TYPE = 6
};
- TensorType in_data_type() const
+ onert_tflite::TensorType in_data_type() const
{
- return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
}
- TensorType out_data_type() const
+ onert_tflite::TensorType out_data_type() const
{
- return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -5063,13 +5709,14 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct CastOptionsBuilder
{
+ typedef CastOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_in_data_type(TensorType in_data_type)
+ void add_in_data_type(onert_tflite::TensorType in_data_type)
{
fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
}
- void add_out_data_type(TensorType out_data_type)
+ void add_out_data_type(onert_tflite::TensorType out_data_type)
{
fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
}
@@ -5077,7 +5724,6 @@ struct CastOptionsBuilder
{
start_ = fbb_.StartTable();
}
- CastOptionsBuilder &operator=(const CastOptionsBuilder &);
flatbuffers::Offset<CastOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5088,8 +5734,8 @@ struct CastOptionsBuilder
inline flatbuffers::Offset<CastOptions>
CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType in_data_type = TensorType_FLOAT32,
- TensorType out_data_type = TensorType_FLOAT32)
+ onert_tflite::TensorType in_data_type = onert_tflite::TensorType_FLOAT32,
+ onert_tflite::TensorType out_data_type = onert_tflite::TensorType_FLOAT32)
{
CastOptionsBuilder builder_(_fbb);
builder_.add_out_data_type(out_data_type);
@@ -5099,6 +5745,7 @@ CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef DequantizeOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5107,13 +5754,13 @@ struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct DequantizeOptionsBuilder
{
+ typedef DequantizeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
flatbuffers::Offset<DequantizeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5131,6 +5778,7 @@ CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef MaximumMinimumOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5139,13 +5787,13 @@ struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
struct MaximumMinimumOptionsBuilder
{
+ typedef MaximumMinimumOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
flatbuffers::Offset<MaximumMinimumOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5163,6 +5811,7 @@ CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef TileOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5171,13 +5820,13 @@ struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct TileOptionsBuilder
{
+ typedef TileOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- TileOptionsBuilder &operator=(const TileOptionsBuilder &);
flatbuffers::Offset<TileOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5194,13 +5843,14 @@ inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBuffe
struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ArgMaxOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_OUTPUT_TYPE = 4
};
- TensorType output_type() const
+ onert_tflite::TensorType output_type() const
{
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -5211,9 +5861,10 @@ struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ArgMaxOptionsBuilder
{
+ typedef ArgMaxOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
+ void add_output_type(onert_tflite::TensorType output_type)
{
fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
}
@@ -5221,7 +5872,6 @@ struct ArgMaxOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
flatbuffers::Offset<ArgMaxOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5232,7 +5882,7 @@ struct ArgMaxOptionsBuilder
inline flatbuffers::Offset<ArgMaxOptions>
CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
+ onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
{
ArgMaxOptionsBuilder builder_(_fbb);
builder_.add_output_type(output_type);
@@ -5241,13 +5891,14 @@ CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ArgMinOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_OUTPUT_TYPE = 4
};
- TensorType output_type() const
+ onert_tflite::TensorType output_type() const
{
- return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -5258,9 +5909,10 @@ struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ArgMinOptionsBuilder
{
+ typedef ArgMinOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_output_type(TensorType output_type)
+ void add_output_type(onert_tflite::TensorType output_type)
{
fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
}
@@ -5268,7 +5920,6 @@ struct ArgMinOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
flatbuffers::Offset<ArgMinOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5279,7 +5930,7 @@ struct ArgMinOptionsBuilder
inline flatbuffers::Offset<ArgMinOptions>
CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType output_type = TensorType_FLOAT32)
+ onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
{
ArgMinOptionsBuilder builder_(_fbb);
builder_.add_output_type(output_type);
@@ -5288,6 +5939,7 @@ CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef GreaterOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5296,13 +5948,13 @@ struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct GreaterOptionsBuilder
{
+ typedef GreaterOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
flatbuffers::Offset<GreaterOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5320,6 +5972,7 @@ CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef GreaterEqualOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5328,13 +5981,13 @@ struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct GreaterEqualOptionsBuilder
{
+ typedef GreaterEqualOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
flatbuffers::Offset<GreaterEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5352,6 +6005,7 @@ CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LessOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5360,13 +6014,13 @@ struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LessOptionsBuilder
{
+ typedef LessOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LessOptionsBuilder &operator=(const LessOptionsBuilder &);
flatbuffers::Offset<LessOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5383,6 +6037,7 @@ inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBuffe
struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LessEqualOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5391,13 +6046,13 @@ struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LessEqualOptionsBuilder
{
+ typedef LessEqualOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
flatbuffers::Offset<LessEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5415,6 +6070,7 @@ CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef NegOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5423,13 +6079,13 @@ struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct NegOptionsBuilder
{
+ typedef NegOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- NegOptionsBuilder &operator=(const NegOptionsBuilder &);
flatbuffers::Offset<NegOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5446,6 +6102,7 @@ inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferB
struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SelectOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5454,13 +6111,13 @@ struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SelectOptionsBuilder
{
+ typedef SelectOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
flatbuffers::Offset<SelectOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5477,6 +6134,7 @@ inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatB
struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SliceOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5485,13 +6143,13 @@ struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SliceOptionsBuilder
{
+ typedef SliceOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
flatbuffers::Offset<SliceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5508,13 +6166,17 @@ inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBuf
struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef TransposeConvOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_PADDING = 4,
VT_STRIDE_W = 6,
VT_STRIDE_H = 8
};
- Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+ onert_tflite::Padding padding() const
+ {
+ return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+ }
int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
@@ -5527,9 +6189,10 @@ struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct TransposeConvOptionsBuilder
{
+ typedef TransposeConvOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_padding(Padding padding)
+ void add_padding(onert_tflite::Padding padding)
{
fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
}
@@ -5545,7 +6208,6 @@ struct TransposeConvOptionsBuilder
{
start_ = fbb_.StartTable();
}
- TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
flatbuffers::Offset<TransposeConvOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5555,7 +6217,8 @@ struct TransposeConvOptionsBuilder
};
inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::Padding padding = onert_tflite::Padding_SAME,
int32_t stride_w = 0, int32_t stride_h = 0)
{
TransposeConvOptionsBuilder builder_(_fbb);
@@ -5567,6 +6230,7 @@ CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding
struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef ExpandDimsOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5575,13 +6239,13 @@ struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ExpandDimsOptionsBuilder
{
+ typedef ExpandDimsOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
flatbuffers::Offset<ExpandDimsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5599,7 +6263,8 @@ CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SparseToDenseOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VALIDATE_INDICES = 4
};
@@ -5613,6 +6278,7 @@ struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SparseToDenseOptionsBuilder
{
+ typedef SparseToDenseOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_validate_indices(bool validate_indices)
@@ -5624,7 +6290,6 @@ struct SparseToDenseOptionsBuilder
{
start_ = fbb_.StartTable();
}
- SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
flatbuffers::Offset<SparseToDenseOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5643,6 +6308,7 @@ CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_i
struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef EqualOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5651,13 +6317,13 @@ struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct EqualOptionsBuilder
{
+ typedef EqualOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
flatbuffers::Offset<EqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5674,6 +6340,7 @@ inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBuf
struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef NotEqualOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5682,13 +6349,13 @@ struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct NotEqualOptionsBuilder
{
+ typedef NotEqualOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
flatbuffers::Offset<NotEqualOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5706,11 +6373,15 @@ CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ShapeOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_OUT_TYPE = 4
};
- TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+ onert_tflite::TensorType out_type() const
+ {
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
@@ -5720,9 +6391,10 @@ struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ShapeOptionsBuilder
{
+ typedef ShapeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_out_type(TensorType out_type)
+ void add_out_type(onert_tflite::TensorType out_type)
{
fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
}
@@ -5730,7 +6402,6 @@ struct ShapeOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
flatbuffers::Offset<ShapeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5740,7 +6411,8 @@ struct ShapeOptionsBuilder
};
inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ onert_tflite::TensorType out_type = onert_tflite::TensorType_FLOAT32)
{
ShapeOptionsBuilder builder_(_fbb);
builder_.add_out_type(out_type);
@@ -5749,6 +6421,7 @@ CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = T
struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef RankOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5757,13 +6430,13 @@ struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct RankOptionsBuilder
{
+ typedef RankOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- RankOptionsBuilder &operator=(const RankOptionsBuilder &);
flatbuffers::Offset<RankOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5780,6 +6453,7 @@ inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBuffe
struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef PowOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5788,13 +6462,13 @@ struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct PowOptionsBuilder
{
+ typedef PowOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- PowOptionsBuilder &operator=(const PowOptionsBuilder &);
flatbuffers::Offset<PowOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5811,7 +6485,8 @@ inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferB
struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef FakeQuantOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_MIN = 4,
VT_MAX = 6,
@@ -5832,6 +6507,7 @@ struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct FakeQuantOptionsBuilder
{
+ typedef FakeQuantOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
@@ -5849,7 +6525,6 @@ struct FakeQuantOptionsBuilder
{
start_ = fbb_.StartTable();
}
- FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
flatbuffers::Offset<FakeQuantOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5872,7 +6547,8 @@ CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, f
struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef PackOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VALUES_COUNT = 4,
VT_AXIS = 6
@@ -5888,6 +6564,7 @@ struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct PackOptionsBuilder
{
+ typedef PackOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_values_count(int32_t values_count)
@@ -5899,7 +6576,6 @@ struct PackOptionsBuilder
{
start_ = fbb_.StartTable();
}
- PackOptionsBuilder &operator=(const PackOptionsBuilder &);
flatbuffers::Offset<PackOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5919,6 +6595,7 @@ CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0
struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LogicalOrOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5927,13 +6604,13 @@ struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LogicalOrOptionsBuilder
{
+ typedef LogicalOrOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
flatbuffers::Offset<LogicalOrOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5951,7 +6628,8 @@ CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef OneHotOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_AXIS = 4
};
@@ -5965,6 +6643,7 @@ struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct OneHotOptionsBuilder
{
+ typedef OneHotOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
@@ -5972,7 +6651,6 @@ struct OneHotOptionsBuilder
{
start_ = fbb_.StartTable();
}
- OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
flatbuffers::Offset<OneHotOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -5991,6 +6669,7 @@ inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatB
struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef AbsOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5999,13 +6678,13 @@ struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct AbsOptionsBuilder
{
+ typedef AbsOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
flatbuffers::Offset<AbsOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6022,6 +6701,7 @@ inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferB
struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef HardSwishOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6030,13 +6710,13 @@ struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct HardSwishOptionsBuilder
{
+ typedef HardSwishOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
flatbuffers::Offset<HardSwishOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6054,6 +6734,7 @@ CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LogicalAndOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6062,13 +6743,13 @@ struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LogicalAndOptionsBuilder
{
+ typedef LogicalAndOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
flatbuffers::Offset<LogicalAndOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6086,6 +6767,7 @@ CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef LogicalNotOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6094,13 +6776,13 @@ struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LogicalNotOptionsBuilder
{
+ typedef LogicalNotOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
flatbuffers::Offset<LogicalNotOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6118,7 +6800,8 @@ CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef UnpackOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NUM = 4,
VT_AXIS = 6
@@ -6134,6 +6817,7 @@ struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct UnpackOptionsBuilder
{
+ typedef UnpackOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
@@ -6142,7 +6826,6 @@ struct UnpackOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
flatbuffers::Offset<UnpackOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6162,6 +6845,7 @@ inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatB
struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef FloorDivOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6170,13 +6854,13 @@ struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct FloorDivOptionsBuilder
{
+ typedef FloorDivOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
flatbuffers::Offset<FloorDivOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6194,6 +6878,7 @@ CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SquareOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6202,13 +6887,13 @@ struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SquareOptionsBuilder
{
+ typedef SquareOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
flatbuffers::Offset<SquareOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6225,6 +6910,7 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB
struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef ZerosLikeOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6233,13 +6919,13 @@ struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ZerosLikeOptionsBuilder
{
+ typedef ZerosLikeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
flatbuffers::Offset<ZerosLikeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6257,6 +6943,7 @@ CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef FillOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6265,13 +6952,13 @@ struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct FillOptionsBuilder
{
+ typedef FillOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- FillOptionsBuilder &operator=(const FillOptionsBuilder &);
flatbuffers::Offset<FillOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6288,6 +6975,7 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef FloorModOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6296,13 +6984,13 @@ struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct FloorModOptionsBuilder
{
+ typedef FloorModOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
flatbuffers::Offset<FloorModOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6320,6 +7008,7 @@ CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef RangeOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6328,13 +7017,13 @@ struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct RangeOptionsBuilder
{
+ typedef RangeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
flatbuffers::Offset<RangeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6351,7 +7040,8 @@ inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBuf
struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef LeakyReluOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_ALPHA = 4
};
@@ -6365,6 +7055,7 @@ struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct LeakyReluOptionsBuilder
{
+ typedef LeakyReluOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
@@ -6372,7 +7063,6 @@ struct LeakyReluOptionsBuilder
{
start_ = fbb_.StartTable();
}
- LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
flatbuffers::Offset<LeakyReluOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6391,6 +7081,7 @@ CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SquaredDifferenceOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6399,13 +7090,13 @@ struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
struct SquaredDifferenceOptionsBuilder
{
+ typedef SquaredDifferenceOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
flatbuffers::Offset<SquaredDifferenceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6423,11 +7114,15 @@ CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef MirrorPadOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_MODE = 4
};
- MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+ onert_tflite::MirrorPadMode mode() const
+ {
+ return static_cast<onert_tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
@@ -6437,9 +7132,10 @@ struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct MirrorPadOptionsBuilder
{
+ typedef MirrorPadOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_mode(MirrorPadMode mode)
+ void add_mode(onert_tflite::MirrorPadMode mode)
{
fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
}
@@ -6447,7 +7143,6 @@ struct MirrorPadOptionsBuilder
{
start_ = fbb_.StartTable();
}
- MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
flatbuffers::Offset<MirrorPadOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6458,7 +7153,7 @@ struct MirrorPadOptionsBuilder
inline flatbuffers::Offset<MirrorPadOptions>
CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
- MirrorPadMode mode = MirrorPadMode_REFLECT)
+ onert_tflite::MirrorPadMode mode = onert_tflite::MirrorPadMode_REFLECT)
{
MirrorPadOptionsBuilder builder_(_fbb);
builder_.add_mode(mode);
@@ -6467,13 +7162,14 @@ CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef UniqueOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_IDX_OUT_TYPE = 4
};
- TensorType idx_out_type() const
+ onert_tflite::TensorType idx_out_type() const
{
- return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -6484,9 +7180,10 @@ struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct UniqueOptionsBuilder
{
+ typedef UniqueOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_idx_out_type(TensorType idx_out_type)
+ void add_idx_out_type(onert_tflite::TensorType idx_out_type)
{
fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
}
@@ -6494,7 +7191,6 @@ struct UniqueOptionsBuilder
{
start_ = fbb_.StartTable();
}
- UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
flatbuffers::Offset<UniqueOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6505,7 +7201,7 @@ struct UniqueOptionsBuilder
inline flatbuffers::Offset<UniqueOptions>
CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
- TensorType idx_out_type = TensorType_INT32)
+ onert_tflite::TensorType idx_out_type = onert_tflite::TensorType_INT32)
{
UniqueOptionsBuilder builder_(_fbb);
builder_.add_idx_out_type(idx_out_type);
@@ -6514,6 +7210,7 @@ CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef ReverseV2OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6522,13 +7219,13 @@ struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ReverseV2OptionsBuilder
{
+ typedef ReverseV2Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
flatbuffers::Offset<ReverseV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6546,6 +7243,7 @@ CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef AddNOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6554,13 +7252,13 @@ struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct AddNOptionsBuilder
{
+ typedef AddNOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
flatbuffers::Offset<AddNOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6577,6 +7275,7 @@ inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBuffe
struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef GatherNdOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6585,13 +7284,13 @@ struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct GatherNdOptionsBuilder
{
+ typedef GatherNdOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
flatbuffers::Offset<GatherNdOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6609,6 +7308,7 @@ CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef WhereOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6617,13 +7317,13 @@ struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct WhereOptionsBuilder
{
+ typedef WhereOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
flatbuffers::Offset<WhereOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6640,7 +7340,8 @@ inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBuf
struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ReverseSequenceOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_SEQ_DIM = 4,
VT_BATCH_DIM = 6
@@ -6656,6 +7357,7 @@ struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
struct ReverseSequenceOptionsBuilder
{
+ typedef ReverseSequenceOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_seq_dim(int32_t seq_dim)
@@ -6670,7 +7372,6 @@ struct ReverseSequenceOptionsBuilder
{
start_ = fbb_.StartTable();
}
- ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
flatbuffers::Offset<ReverseSequenceOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6691,6 +7392,7 @@ CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_d
struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef MatrixDiagOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6699,13 +7401,13 @@ struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct MatrixDiagOptionsBuilder
{
+ typedef MatrixDiagOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
flatbuffers::Offset<MatrixDiagOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6723,6 +7425,7 @@ CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef QuantizeOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6731,13 +7434,13 @@ struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct QuantizeOptionsBuilder
{
+ typedef QuantizeOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
flatbuffers::Offset<QuantizeOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6755,6 +7458,7 @@ CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef MatrixSetDiagOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6763,13 +7467,13 @@ struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct MatrixSetDiagOptionsBuilder
{
+ typedef MatrixSetDiagOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
flatbuffers::Offset<MatrixSetDiagOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6787,7 +7491,8 @@ CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef IfOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_THEN_SUBGRAPH_INDEX = 4,
VT_ELSE_SUBGRAPH_INDEX = 6
@@ -6803,6 +7508,7 @@ struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct IfOptionsBuilder
{
+ typedef IfOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_then_subgraph_index(int32_t then_subgraph_index)
@@ -6817,7 +7523,6 @@ struct IfOptionsBuilder
{
start_ = fbb_.StartTable();
}
- IfOptionsBuilder &operator=(const IfOptionsBuilder &);
flatbuffers::Offset<IfOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6836,9 +7541,54 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
return builder_.Finish();
}
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef CallOnceOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_INIT_SUBGRAPH_INDEX = 4
+ };
+ int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+ verifier.EndTable();
+ }
+};
+
+struct CallOnceOptionsBuilder
+{
+ typedef CallOnceOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_init_subgraph_index(int32_t init_subgraph_index)
+ {
+ fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+ }
+ explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<CallOnceOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CallOnceOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+ CallOnceOptionsBuilder builder_(_fbb);
+ builder_.add_init_subgraph_index(init_subgraph_index);
+ return builder_.Finish();
+}
+
struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef WhileOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_COND_SUBGRAPH_INDEX = 4,
VT_BODY_SUBGRAPH_INDEX = 6
@@ -6854,6 +7604,7 @@ struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct WhileOptionsBuilder
{
+ typedef WhileOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_cond_subgraph_index(int32_t cond_subgraph_index)
@@ -6868,7 +7619,6 @@ struct WhileOptionsBuilder
{
start_ = fbb_.StartTable();
}
- WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
flatbuffers::Offset<WhileOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6889,6 +7639,7 @@ inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBuf
struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef NonMaxSuppressionV4OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6897,13 +7648,13 @@ struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
struct NonMaxSuppressionV4OptionsBuilder
{
+ typedef NonMaxSuppressionV4Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6921,6 +7672,7 @@ CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef NonMaxSuppressionV5OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6929,13 +7681,13 @@ struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
struct NonMaxSuppressionV5OptionsBuilder
{
+ typedef NonMaxSuppressionV5Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6953,6 +7705,7 @@ CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef ScatterNdOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6961,13 +7714,13 @@ struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct ScatterNdOptionsBuilder
{
+ typedef ScatterNdOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
flatbuffers::Offset<ScatterNdOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -6985,6 +7738,7 @@ CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SelectV2OptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6993,13 +7747,13 @@ struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SelectV2OptionsBuilder
{
+ typedef SelectV2Options Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
flatbuffers::Offset<SelectV2Options> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7017,6 +7771,7 @@ CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef DensifyOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7025,13 +7780,13 @@ struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct DensifyOptionsBuilder
{
+ typedef DensifyOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
flatbuffers::Offset<DensifyOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7049,6 +7804,7 @@ CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
+ typedef SegmentSumOptionsBuilder Builder;
bool Verify(flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7057,13 +7813,13 @@ struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SegmentSumOptionsBuilder
{
+ typedef SegmentSumOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
flatbuffers::Offset<SegmentSumOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7081,39 +7837,49 @@ CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef BatchMatMulOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_ADJOINT_LHS = 4,
- VT_ADJOINT_RHS = 6
+ VT_ADJ_X = 4,
+ VT_ADJ_Y = 6,
+ VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
};
- bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
- bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+ bool adj_x() const { return GetField<uint8_t>(VT_ADJ_X, 0) != 0; }
+ bool adj_y() const { return GetField<uint8_t>(VT_ADJ_Y, 0) != 0; }
+ bool asymmetric_quantize_inputs() const
+ {
+ return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
- return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
- VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJ_X) &&
+ VerifyField<uint8_t>(verifier, VT_ADJ_Y) &&
+ VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
}
};
struct BatchMatMulOptionsBuilder
{
+ typedef BatchMatMulOptions Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_adjoint_lhs(bool adjoint_lhs)
+ void add_adj_x(bool adj_x)
{
- fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
- 0);
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
}
- void add_adjoint_rhs(bool adjoint_rhs)
+ void add_adj_y(bool adj_y)
{
- fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
- 0);
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
+ }
+ void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+ static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
flatbuffers::Offset<BatchMatMulOptions> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7123,47 +7889,517 @@ struct BatchMatMulOptionsBuilder
};
inline flatbuffers::Offset<BatchMatMulOptions>
-CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
- bool adjoint_rhs = false)
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adj_x = false,
+ bool adj_y = false, bool asymmetric_quantize_inputs = false)
{
BatchMatMulOptionsBuilder builder_(_fbb);
- builder_.add_adjoint_rhs(adjoint_rhs);
- builder_.add_adjoint_lhs(adjoint_lhs);
+ builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+ builder_.add_adj_y(adj_y);
+ builder_.add_adj_x(adj_x);
return builder_.Finish();
}
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef CumsumOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
- VT_BUILTIN_CODE = 4,
- VT_CUSTOM_CODE = 6,
- VT_VERSION = 8
+ VT_EXCLUSIVE = 4,
+ VT_REVERSE = 6
+ };
+ bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+ bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+ VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
+ }
+};
+
+struct CumsumOptionsBuilder
+{
+ typedef CumsumOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_exclusive(bool exclusive)
+ {
+ fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+ }
+ void add_reverse(bool reverse)
+ {
+ fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+ }
+ explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<CumsumOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<CumsumOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ bool exclusive = false,
+ bool reverse = false)
+{
+ CumsumOptionsBuilder builder_(_fbb);
+ builder_.add_reverse(reverse);
+ builder_.add_exclusive(exclusive);
+ return builder_.Finish();
+}
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef BroadcastToOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct BroadcastToOptionsBuilder
+{
+ typedef BroadcastToOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<BroadcastToOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ BroadcastToOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef Rfft2dOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct Rfft2dOptionsBuilder
+{
+ typedef Rfft2dOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<Rfft2dOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ Rfft2dOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_TABLE_ID = 4,
+ VT_KEY_DTYPE = 6,
+ VT_VALUE_DTYPE = 8
+ };
+ int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+ onert_tflite::TensorType key_dtype() const
+ {
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+ }
+ onert_tflite::TensorType value_dtype() const
+ {
+ return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+ VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+ VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+ }
+};
+
+struct HashtableOptionsBuilder
+{
+ typedef HashtableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_table_id(int32_t table_id)
+ {
+ fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+ }
+ void add_key_dtype(onert_tflite::TensorType key_dtype)
+ {
+ fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+ }
+ void add_value_dtype(onert_tflite::TensorType value_dtype)
+ {
+ fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+ }
+ explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+ onert_tflite::TensorType key_dtype = onert_tflite::TensorType_FLOAT32,
+ onert_tflite::TensorType value_dtype = onert_tflite::TensorType_FLOAT32)
+{
+ HashtableOptionsBuilder builder_(_fbb);
+ builder_.add_table_id(table_id);
+ builder_.add_value_dtype(value_dtype);
+ builder_.add_key_dtype(key_dtype);
+ return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableFindOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableFindOptionsBuilder
+{
+ typedef HashtableFindOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableFindOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableFindOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableImportOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableImportOptionsBuilder
+{
+ typedef HashtableImportOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableImportOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableImportOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef HashtableSizeOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+ typedef HashtableSizeOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<HashtableSizeOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ HashtableSizeOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef VarHandleOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_CONTAINER = 4,
+ VT_SHARED_NAME = 6
+ };
+ const flatbuffers::String *container() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+ }
+ const flatbuffers::String *shared_name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+ }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+ verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+ verifier.VerifyString(shared_name()) && verifier.EndTable();
+ }
+};
+
+struct VarHandleOptionsBuilder
+{
+ typedef VarHandleOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_container(flatbuffers::Offset<flatbuffers::String> container)
+ {
+ fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+ }
+ void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+ {
+ fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+ }
+ explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<VarHandleOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<VarHandleOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> container = 0,
+ flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+ VarHandleOptionsBuilder builder_(_fbb);
+ builder_.add_shared_name(shared_name);
+ builder_.add_container(container);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+ const char *shared_name = nullptr)
+{
+ auto container__ = container ? _fbb.CreateString(container) : 0;
+ auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+ return onert_tflite::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef ReadVariableOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct ReadVariableOptionsBuilder
+{
+ typedef ReadVariableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<ReadVariableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ ReadVariableOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef AssignVariableOptionsBuilder Builder;
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && verifier.EndTable();
+ }
+};
+
+struct AssignVariableOptionsBuilder
+{
+ typedef AssignVariableOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<AssignVariableOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+ AssignVariableOptionsBuilder builder_(_fbb);
+ return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef RandomOptionsBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_SEED = 4,
+ VT_SEED2 = 6
};
- BuiltinOperator builtin_code() const
+ int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); }
+ int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
{
- return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
+ return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) &&
+ VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable();
}
+};
+
+struct RandomOptionsBuilder
+{
+ typedef RandomOptions Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); }
+ void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); }
+ explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<RandomOptions> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<RandomOptions>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+ int32_t seed = 0, int32_t seed2 = 0)
+{
+ RandomOptionsBuilder builder_(_fbb);
+ builder_.add_seed2(seed2);
+ builder_.add_seed(seed);
+ return builder_.Finish();
+}
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef OperatorCodeBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_DEPRECATED_BUILTIN_CODE = 4,
+ VT_CUSTOM_CODE = 6,
+ VT_VERSION = 8,
+ VT_BUILTIN_CODE = 10
+ };
+ int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
const flatbuffers::String *custom_code() const
{
return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
}
int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+ onert_tflite::BuiltinOperator builtin_code() const
+ {
+ return static_cast<onert_tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+ }
bool Verify(flatbuffers::Verifier &verifier) const
{
- return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
+ return VerifyTableStart(verifier) &&
+ VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
- VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+ VerifyField<int32_t>(verifier, VT_VERSION) &&
+ VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
}
};
struct OperatorCodeBuilder
{
+ typedef OperatorCode Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_builtin_code(BuiltinOperator builtin_code)
+ void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
{
- fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
+ fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
}
void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
{
@@ -7173,11 +8409,14 @@ struct OperatorCodeBuilder
{
fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
}
+ void add_builtin_code(onert_tflite::BuiltinOperator builtin_code)
+ {
+ fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+ }
explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
flatbuffers::Offset<OperatorCode> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -7187,29 +8426,32 @@ struct OperatorCodeBuilder
};
inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+ flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+ onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
{
OperatorCodeBuilder builder_(_fbb);
+ builder_.add_builtin_code(builtin_code);
builder_.add_version(version);
builder_.add_custom_code(custom_code);
- builder_.add_builtin_code(builtin_code);
+ builder_.add_deprecated_builtin_code(deprecated_builtin_code);
return builder_.Finish();
}
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
- BuiltinOperator builtin_code = BuiltinOperator_ADD,
- const char *custom_code = nullptr, int32_t version = 1)
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+ const char *custom_code = nullptr, int32_t version = 1,
+ onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
{
- return onert_tflite::CreateOperatorCode(
- _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+ auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+ return onert_tflite::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+ builtin_code);
}
struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef OperatorBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_OPCODE_INDEX = 4,
VT_INPUTS = 6,
@@ -7230,628 +8472,715 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
}
- BuiltinOptions builtin_options_type() const
+ onert_tflite::BuiltinOptions builtin_options_type() const
{
- return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+ return static_cast<onert_tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
}
const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
template <typename T> const T *builtin_options_as() const;
- const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+ const onert_tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const
{
- return builtin_options_type() == BuiltinOptions_Conv2DOptions
- ? static_cast<const Conv2DOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_Conv2DOptions
+ ? static_cast<const onert_tflite::Conv2DOptions *>(builtin_options())
: nullptr;
}
- const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+ const onert_tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
{
- return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_DepthwiseConv2DOptions
+ ? static_cast<const onert_tflite::DepthwiseConv2DOptions *>(builtin_options())
: nullptr;
}
- const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+ const onert_tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
{
- return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatEmbeddingsOptions
+ ? static_cast<const onert_tflite::ConcatEmbeddingsOptions *>(builtin_options())
: nullptr;
}
- const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+ const onert_tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
{
- return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
- ? static_cast<const LSHProjectionOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LSHProjectionOptions
+ ? static_cast<const onert_tflite::LSHProjectionOptions *>(builtin_options())
: nullptr;
}
- const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+ const onert_tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const
{
- return builtin_options_type() == BuiltinOptions_Pool2DOptions
- ? static_cast<const Pool2DOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_Pool2DOptions
+ ? static_cast<const onert_tflite::Pool2DOptions *>(builtin_options())
: nullptr;
}
- const SVDFOptions *builtin_options_as_SVDFOptions() const
+ const onert_tflite::SVDFOptions *builtin_options_as_SVDFOptions() const
{
- return builtin_options_type() == BuiltinOptions_SVDFOptions
- ? static_cast<const SVDFOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SVDFOptions
+ ? static_cast<const onert_tflite::SVDFOptions *>(builtin_options())
: nullptr;
}
- const RNNOptions *builtin_options_as_RNNOptions() const
+ const onert_tflite::RNNOptions *builtin_options_as_RNNOptions() const
{
- return builtin_options_type() == BuiltinOptions_RNNOptions
- ? static_cast<const RNNOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_RNNOptions
+ ? static_cast<const onert_tflite::RNNOptions *>(builtin_options())
: nullptr;
}
- const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+ const onert_tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
{
- return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
- ? static_cast<const FullyConnectedOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_FullyConnectedOptions
+ ? static_cast<const onert_tflite::FullyConnectedOptions *>(builtin_options())
: nullptr;
}
- const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+ const onert_tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
{
- return builtin_options_type() == BuiltinOptions_SoftmaxOptions
- ? static_cast<const SoftmaxOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SoftmaxOptions
+ ? static_cast<const onert_tflite::SoftmaxOptions *>(builtin_options())
: nullptr;
}
- const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+ const onert_tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
{
- return builtin_options_type() == BuiltinOptions_ConcatenationOptions
- ? static_cast<const ConcatenationOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatenationOptions
+ ? static_cast<const onert_tflite::ConcatenationOptions *>(builtin_options())
: nullptr;
}
- const AddOptions *builtin_options_as_AddOptions() const
+ const onert_tflite::AddOptions *builtin_options_as_AddOptions() const
{
- return builtin_options_type() == BuiltinOptions_AddOptions
- ? static_cast<const AddOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_AddOptions
+ ? static_cast<const onert_tflite::AddOptions *>(builtin_options())
: nullptr;
}
- const L2NormOptions *builtin_options_as_L2NormOptions() const
+ const onert_tflite::L2NormOptions *builtin_options_as_L2NormOptions() const
{
- return builtin_options_type() == BuiltinOptions_L2NormOptions
- ? static_cast<const L2NormOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_L2NormOptions
+ ? static_cast<const onert_tflite::L2NormOptions *>(builtin_options())
: nullptr;
}
- const LocalResponseNormalizationOptions *
+ const onert_tflite::LocalResponseNormalizationOptions *
builtin_options_as_LocalResponseNormalizationOptions() const
{
- return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LocalResponseNormalizationOptions
+ ? static_cast<const onert_tflite::LocalResponseNormalizationOptions *>(
+ builtin_options())
: nullptr;
}
- const LSTMOptions *builtin_options_as_LSTMOptions() const
+ const onert_tflite::LSTMOptions *builtin_options_as_LSTMOptions() const
{
- return builtin_options_type() == BuiltinOptions_LSTMOptions
- ? static_cast<const LSTMOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LSTMOptions
+ ? static_cast<const onert_tflite::LSTMOptions *>(builtin_options())
: nullptr;
}
- const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+ const onert_tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
{
- return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeBilinearOptions
+ ? static_cast<const onert_tflite::ResizeBilinearOptions *>(builtin_options())
: nullptr;
}
- const CallOptions *builtin_options_as_CallOptions() const
+ const onert_tflite::CallOptions *builtin_options_as_CallOptions() const
{
- return builtin_options_type() == BuiltinOptions_CallOptions
- ? static_cast<const CallOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_CallOptions
+ ? static_cast<const onert_tflite::CallOptions *>(builtin_options())
: nullptr;
}
- const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+ const onert_tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const
{
- return builtin_options_type() == BuiltinOptions_ReshapeOptions
- ? static_cast<const ReshapeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ReshapeOptions
+ ? static_cast<const onert_tflite::ReshapeOptions *>(builtin_options())
: nullptr;
}
- const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+ const onert_tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const
{
- return builtin_options_type() == BuiltinOptions_SkipGramOptions
- ? static_cast<const SkipGramOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SkipGramOptions
+ ? static_cast<const onert_tflite::SkipGramOptions *>(builtin_options())
: nullptr;
}
- const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+ const onert_tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
{
- return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToDepthOptions
+ ? static_cast<const onert_tflite::SpaceToDepthOptions *>(builtin_options())
: nullptr;
}
- const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+ const onert_tflite::EmbeddingLookupSparseOptions *
+ builtin_options_as_EmbeddingLookupSparseOptions() const
{
- return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_EmbeddingLookupSparseOptions
+ ? static_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(builtin_options())
: nullptr;
}
- const MulOptions *builtin_options_as_MulOptions() const
+ const onert_tflite::MulOptions *builtin_options_as_MulOptions() const
{
- return builtin_options_type() == BuiltinOptions_MulOptions
- ? static_cast<const MulOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_MulOptions
+ ? static_cast<const onert_tflite::MulOptions *>(builtin_options())
: nullptr;
}
- const PadOptions *builtin_options_as_PadOptions() const
+ const onert_tflite::PadOptions *builtin_options_as_PadOptions() const
{
- return builtin_options_type() == BuiltinOptions_PadOptions
- ? static_cast<const PadOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_PadOptions
+ ? static_cast<const onert_tflite::PadOptions *>(builtin_options())
: nullptr;
}
- const GatherOptions *builtin_options_as_GatherOptions() const
+ const onert_tflite::GatherOptions *builtin_options_as_GatherOptions() const
{
- return builtin_options_type() == BuiltinOptions_GatherOptions
- ? static_cast<const GatherOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_GatherOptions
+ ? static_cast<const onert_tflite::GatherOptions *>(builtin_options())
: nullptr;
}
- const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+ const onert_tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
{
- return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_BatchToSpaceNDOptions
+ ? static_cast<const onert_tflite::BatchToSpaceNDOptions *>(builtin_options())
: nullptr;
}
- const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+ const onert_tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
{
- return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToBatchNDOptions
+ ? static_cast<const onert_tflite::SpaceToBatchNDOptions *>(builtin_options())
: nullptr;
}
- const TransposeOptions *builtin_options_as_TransposeOptions() const
+ const onert_tflite::TransposeOptions *builtin_options_as_TransposeOptions() const
{
- return builtin_options_type() == BuiltinOptions_TransposeOptions
- ? static_cast<const TransposeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeOptions
+ ? static_cast<const onert_tflite::TransposeOptions *>(builtin_options())
: nullptr;
}
- const ReducerOptions *builtin_options_as_ReducerOptions() const
+ const onert_tflite::ReducerOptions *builtin_options_as_ReducerOptions() const
{
- return builtin_options_type() == BuiltinOptions_ReducerOptions
- ? static_cast<const ReducerOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ReducerOptions
+ ? static_cast<const onert_tflite::ReducerOptions *>(builtin_options())
: nullptr;
}
- const SubOptions *builtin_options_as_SubOptions() const
+ const onert_tflite::SubOptions *builtin_options_as_SubOptions() const
{
- return builtin_options_type() == BuiltinOptions_SubOptions
- ? static_cast<const SubOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SubOptions
+ ? static_cast<const onert_tflite::SubOptions *>(builtin_options())
: nullptr;
}
- const DivOptions *builtin_options_as_DivOptions() const
+ const onert_tflite::DivOptions *builtin_options_as_DivOptions() const
{
- return builtin_options_type() == BuiltinOptions_DivOptions
- ? static_cast<const DivOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_DivOptions
+ ? static_cast<const onert_tflite::DivOptions *>(builtin_options())
: nullptr;
}
- const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+ const onert_tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const
{
- return builtin_options_type() == BuiltinOptions_SqueezeOptions
- ? static_cast<const SqueezeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SqueezeOptions
+ ? static_cast<const onert_tflite::SqueezeOptions *>(builtin_options())
: nullptr;
}
- const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+ const onert_tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
{
- return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
- ? static_cast<const SequenceRNNOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SequenceRNNOptions
+ ? static_cast<const onert_tflite::SequenceRNNOptions *>(builtin_options())
: nullptr;
}
- const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+ const onert_tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
{
- return builtin_options_type() == BuiltinOptions_StridedSliceOptions
- ? static_cast<const StridedSliceOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_StridedSliceOptions
+ ? static_cast<const onert_tflite::StridedSliceOptions *>(builtin_options())
: nullptr;
}
- const ExpOptions *builtin_options_as_ExpOptions() const
+ const onert_tflite::ExpOptions *builtin_options_as_ExpOptions() const
{
- return builtin_options_type() == BuiltinOptions_ExpOptions
- ? static_cast<const ExpOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ExpOptions
+ ? static_cast<const onert_tflite::ExpOptions *>(builtin_options())
: nullptr;
}
- const TopKV2Options *builtin_options_as_TopKV2Options() const
+ const onert_tflite::TopKV2Options *builtin_options_as_TopKV2Options() const
{
- return builtin_options_type() == BuiltinOptions_TopKV2Options
- ? static_cast<const TopKV2Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_TopKV2Options
+ ? static_cast<const onert_tflite::TopKV2Options *>(builtin_options())
: nullptr;
}
- const SplitOptions *builtin_options_as_SplitOptions() const
+ const onert_tflite::SplitOptions *builtin_options_as_SplitOptions() const
{
- return builtin_options_type() == BuiltinOptions_SplitOptions
- ? static_cast<const SplitOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SplitOptions
+ ? static_cast<const onert_tflite::SplitOptions *>(builtin_options())
: nullptr;
}
- const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+ const onert_tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
{
- return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LogSoftmaxOptions
+ ? static_cast<const onert_tflite::LogSoftmaxOptions *>(builtin_options())
: nullptr;
}
- const CastOptions *builtin_options_as_CastOptions() const
+ const onert_tflite::CastOptions *builtin_options_as_CastOptions() const
{
- return builtin_options_type() == BuiltinOptions_CastOptions
- ? static_cast<const CastOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_CastOptions
+ ? static_cast<const onert_tflite::CastOptions *>(builtin_options())
: nullptr;
}
- const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+ const onert_tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const
{
- return builtin_options_type() == BuiltinOptions_DequantizeOptions
- ? static_cast<const DequantizeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_DequantizeOptions
+ ? static_cast<const onert_tflite::DequantizeOptions *>(builtin_options())
: nullptr;
}
- const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+ const onert_tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
{
- return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_MaximumMinimumOptions
+ ? static_cast<const onert_tflite::MaximumMinimumOptions *>(builtin_options())
: nullptr;
}
- const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+ const onert_tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
{
- return builtin_options_type() == BuiltinOptions_ArgMaxOptions
- ? static_cast<const ArgMaxOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMaxOptions
+ ? static_cast<const onert_tflite::ArgMaxOptions *>(builtin_options())
: nullptr;
}
- const LessOptions *builtin_options_as_LessOptions() const
+ const onert_tflite::LessOptions *builtin_options_as_LessOptions() const
{
- return builtin_options_type() == BuiltinOptions_LessOptions
- ? static_cast<const LessOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LessOptions
+ ? static_cast<const onert_tflite::LessOptions *>(builtin_options())
: nullptr;
}
- const NegOptions *builtin_options_as_NegOptions() const
+ const onert_tflite::NegOptions *builtin_options_as_NegOptions() const
{
- return builtin_options_type() == BuiltinOptions_NegOptions
- ? static_cast<const NegOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_NegOptions
+ ? static_cast<const onert_tflite::NegOptions *>(builtin_options())
: nullptr;
}
- const PadV2Options *builtin_options_as_PadV2Options() const
+ const onert_tflite::PadV2Options *builtin_options_as_PadV2Options() const
{
- return builtin_options_type() == BuiltinOptions_PadV2Options
- ? static_cast<const PadV2Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_PadV2Options
+ ? static_cast<const onert_tflite::PadV2Options *>(builtin_options())
: nullptr;
}
- const GreaterOptions *builtin_options_as_GreaterOptions() const
+ const onert_tflite::GreaterOptions *builtin_options_as_GreaterOptions() const
{
- return builtin_options_type() == BuiltinOptions_GreaterOptions
- ? static_cast<const GreaterOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterOptions
+ ? static_cast<const onert_tflite::GreaterOptions *>(builtin_options())
: nullptr;
}
- const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+ const onert_tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
{
- return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
- ? static_cast<const GreaterEqualOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterEqualOptions
+ ? static_cast<const onert_tflite::GreaterEqualOptions *>(builtin_options())
: nullptr;
}
- const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+ const onert_tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const
{
- return builtin_options_type() == BuiltinOptions_LessEqualOptions
- ? static_cast<const LessEqualOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LessEqualOptions
+ ? static_cast<const onert_tflite::LessEqualOptions *>(builtin_options())
: nullptr;
}
- const SelectOptions *builtin_options_as_SelectOptions() const
+ const onert_tflite::SelectOptions *builtin_options_as_SelectOptions() const
{
- return builtin_options_type() == BuiltinOptions_SelectOptions
- ? static_cast<const SelectOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SelectOptions
+ ? static_cast<const onert_tflite::SelectOptions *>(builtin_options())
: nullptr;
}
- const SliceOptions *builtin_options_as_SliceOptions() const
+ const onert_tflite::SliceOptions *builtin_options_as_SliceOptions() const
{
- return builtin_options_type() == BuiltinOptions_SliceOptions
- ? static_cast<const SliceOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SliceOptions
+ ? static_cast<const onert_tflite::SliceOptions *>(builtin_options())
: nullptr;
}
- const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+ const onert_tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
{
- return builtin_options_type() == BuiltinOptions_TransposeConvOptions
- ? static_cast<const TransposeConvOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeConvOptions
+ ? static_cast<const onert_tflite::TransposeConvOptions *>(builtin_options())
: nullptr;
}
- const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+ const onert_tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
{
- return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
- ? static_cast<const SparseToDenseOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SparseToDenseOptions
+ ? static_cast<const onert_tflite::SparseToDenseOptions *>(builtin_options())
: nullptr;
}
- const TileOptions *builtin_options_as_TileOptions() const
+ const onert_tflite::TileOptions *builtin_options_as_TileOptions() const
{
- return builtin_options_type() == BuiltinOptions_TileOptions
- ? static_cast<const TileOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_TileOptions
+ ? static_cast<const onert_tflite::TileOptions *>(builtin_options())
: nullptr;
}
- const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+ const onert_tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
{
- return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
- ? static_cast<const ExpandDimsOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ExpandDimsOptions
+ ? static_cast<const onert_tflite::ExpandDimsOptions *>(builtin_options())
: nullptr;
}
- const EqualOptions *builtin_options_as_EqualOptions() const
+ const onert_tflite::EqualOptions *builtin_options_as_EqualOptions() const
{
- return builtin_options_type() == BuiltinOptions_EqualOptions
- ? static_cast<const EqualOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_EqualOptions
+ ? static_cast<const onert_tflite::EqualOptions *>(builtin_options())
: nullptr;
}
- const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+ const onert_tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const
{
- return builtin_options_type() == BuiltinOptions_NotEqualOptions
- ? static_cast<const NotEqualOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_NotEqualOptions
+ ? static_cast<const onert_tflite::NotEqualOptions *>(builtin_options())
: nullptr;
}
- const ShapeOptions *builtin_options_as_ShapeOptions() const
+ const onert_tflite::ShapeOptions *builtin_options_as_ShapeOptions() const
{
- return builtin_options_type() == BuiltinOptions_ShapeOptions
- ? static_cast<const ShapeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ShapeOptions
+ ? static_cast<const onert_tflite::ShapeOptions *>(builtin_options())
: nullptr;
}
- const PowOptions *builtin_options_as_PowOptions() const
+ const onert_tflite::PowOptions *builtin_options_as_PowOptions() const
{
- return builtin_options_type() == BuiltinOptions_PowOptions
- ? static_cast<const PowOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_PowOptions
+ ? static_cast<const onert_tflite::PowOptions *>(builtin_options())
: nullptr;
}
- const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+ const onert_tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const
{
- return builtin_options_type() == BuiltinOptions_ArgMinOptions
- ? static_cast<const ArgMinOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMinOptions
+ ? static_cast<const onert_tflite::ArgMinOptions *>(builtin_options())
: nullptr;
}
- const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+ const onert_tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
{
- return builtin_options_type() == BuiltinOptions_FakeQuantOptions
- ? static_cast<const FakeQuantOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_FakeQuantOptions
+ ? static_cast<const onert_tflite::FakeQuantOptions *>(builtin_options())
: nullptr;
}
- const PackOptions *builtin_options_as_PackOptions() const
+ const onert_tflite::PackOptions *builtin_options_as_PackOptions() const
{
- return builtin_options_type() == BuiltinOptions_PackOptions
- ? static_cast<const PackOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_PackOptions
+ ? static_cast<const onert_tflite::PackOptions *>(builtin_options())
: nullptr;
}
- const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+ const onert_tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
{
- return builtin_options_type() == BuiltinOptions_LogicalOrOptions
- ? static_cast<const LogicalOrOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalOrOptions
+ ? static_cast<const onert_tflite::LogicalOrOptions *>(builtin_options())
: nullptr;
}
- const OneHotOptions *builtin_options_as_OneHotOptions() const
+ const onert_tflite::OneHotOptions *builtin_options_as_OneHotOptions() const
{
- return builtin_options_type() == BuiltinOptions_OneHotOptions
- ? static_cast<const OneHotOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_OneHotOptions
+ ? static_cast<const onert_tflite::OneHotOptions *>(builtin_options())
: nullptr;
}
- const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+ const onert_tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
{
- return builtin_options_type() == BuiltinOptions_LogicalAndOptions
- ? static_cast<const LogicalAndOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalAndOptions
+ ? static_cast<const onert_tflite::LogicalAndOptions *>(builtin_options())
: nullptr;
}
- const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+ const onert_tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
{
- return builtin_options_type() == BuiltinOptions_LogicalNotOptions
- ? static_cast<const LogicalNotOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalNotOptions
+ ? static_cast<const onert_tflite::LogicalNotOptions *>(builtin_options())
: nullptr;
}
- const UnpackOptions *builtin_options_as_UnpackOptions() const
+ const onert_tflite::UnpackOptions *builtin_options_as_UnpackOptions() const
{
- return builtin_options_type() == BuiltinOptions_UnpackOptions
- ? static_cast<const UnpackOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_UnpackOptions
+ ? static_cast<const onert_tflite::UnpackOptions *>(builtin_options())
: nullptr;
}
- const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+ const onert_tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const
{
- return builtin_options_type() == BuiltinOptions_FloorDivOptions
- ? static_cast<const FloorDivOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_FloorDivOptions
+ ? static_cast<const onert_tflite::FloorDivOptions *>(builtin_options())
: nullptr;
}
- const SquareOptions *builtin_options_as_SquareOptions() const
+ const onert_tflite::SquareOptions *builtin_options_as_SquareOptions() const
{
- return builtin_options_type() == BuiltinOptions_SquareOptions
- ? static_cast<const SquareOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SquareOptions
+ ? static_cast<const onert_tflite::SquareOptions *>(builtin_options())
: nullptr;
}
- const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+ const onert_tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
{
- return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
- ? static_cast<const ZerosLikeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ZerosLikeOptions
+ ? static_cast<const onert_tflite::ZerosLikeOptions *>(builtin_options())
: nullptr;
}
- const FillOptions *builtin_options_as_FillOptions() const
+ const onert_tflite::FillOptions *builtin_options_as_FillOptions() const
{
- return builtin_options_type() == BuiltinOptions_FillOptions
- ? static_cast<const FillOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_FillOptions
+ ? static_cast<const onert_tflite::FillOptions *>(builtin_options())
: nullptr;
}
- const BidirectionalSequenceLSTMOptions *
+ const onert_tflite::BidirectionalSequenceLSTMOptions *
builtin_options_as_BidirectionalSequenceLSTMOptions() const
{
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions
+ ? static_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(
+ builtin_options())
: nullptr;
}
- const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+ const onert_tflite::BidirectionalSequenceRNNOptions *
+ builtin_options_as_BidirectionalSequenceRNNOptions() const
{
- return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceRNNOptions
+ ? static_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(builtin_options())
: nullptr;
}
- const UnidirectionalSequenceLSTMOptions *
+ const onert_tflite::UnidirectionalSequenceLSTMOptions *
builtin_options_as_UnidirectionalSequenceLSTMOptions() const
{
- return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions
+ ? static_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(
+ builtin_options())
: nullptr;
}
- const FloorModOptions *builtin_options_as_FloorModOptions() const
+ const onert_tflite::FloorModOptions *builtin_options_as_FloorModOptions() const
{
- return builtin_options_type() == BuiltinOptions_FloorModOptions
- ? static_cast<const FloorModOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_FloorModOptions
+ ? static_cast<const onert_tflite::FloorModOptions *>(builtin_options())
: nullptr;
}
- const RangeOptions *builtin_options_as_RangeOptions() const
+ const onert_tflite::RangeOptions *builtin_options_as_RangeOptions() const
{
- return builtin_options_type() == BuiltinOptions_RangeOptions
- ? static_cast<const RangeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_RangeOptions
+ ? static_cast<const onert_tflite::RangeOptions *>(builtin_options())
: nullptr;
}
- const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+ const onert_tflite::ResizeNearestNeighborOptions *
+ builtin_options_as_ResizeNearestNeighborOptions() const
{
- return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeNearestNeighborOptions
+ ? static_cast<const onert_tflite::ResizeNearestNeighborOptions *>(builtin_options())
: nullptr;
}
- const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+ const onert_tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
{
- return builtin_options_type() == BuiltinOptions_LeakyReluOptions
- ? static_cast<const LeakyReluOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_LeakyReluOptions
+ ? static_cast<const onert_tflite::LeakyReluOptions *>(builtin_options())
: nullptr;
}
- const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+ const onert_tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
{
- return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SquaredDifferenceOptions
+ ? static_cast<const onert_tflite::SquaredDifferenceOptions *>(builtin_options())
: nullptr;
}
- const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+ const onert_tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
{
- return builtin_options_type() == BuiltinOptions_MirrorPadOptions
- ? static_cast<const MirrorPadOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_MirrorPadOptions
+ ? static_cast<const onert_tflite::MirrorPadOptions *>(builtin_options())
: nullptr;
}
- const AbsOptions *builtin_options_as_AbsOptions() const
+ const onert_tflite::AbsOptions *builtin_options_as_AbsOptions() const
{
- return builtin_options_type() == BuiltinOptions_AbsOptions
- ? static_cast<const AbsOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_AbsOptions
+ ? static_cast<const onert_tflite::AbsOptions *>(builtin_options())
: nullptr;
}
- const SplitVOptions *builtin_options_as_SplitVOptions() const
+ const onert_tflite::SplitVOptions *builtin_options_as_SplitVOptions() const
{
- return builtin_options_type() == BuiltinOptions_SplitVOptions
- ? static_cast<const SplitVOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SplitVOptions
+ ? static_cast<const onert_tflite::SplitVOptions *>(builtin_options())
: nullptr;
}
- const UniqueOptions *builtin_options_as_UniqueOptions() const
+ const onert_tflite::UniqueOptions *builtin_options_as_UniqueOptions() const
{
- return builtin_options_type() == BuiltinOptions_UniqueOptions
- ? static_cast<const UniqueOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_UniqueOptions
+ ? static_cast<const onert_tflite::UniqueOptions *>(builtin_options())
: nullptr;
}
- const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+ const onert_tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const
{
- return builtin_options_type() == BuiltinOptions_ReverseV2Options
- ? static_cast<const ReverseV2Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseV2Options
+ ? static_cast<const onert_tflite::ReverseV2Options *>(builtin_options())
: nullptr;
}
- const AddNOptions *builtin_options_as_AddNOptions() const
+ const onert_tflite::AddNOptions *builtin_options_as_AddNOptions() const
{
- return builtin_options_type() == BuiltinOptions_AddNOptions
- ? static_cast<const AddNOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_AddNOptions
+ ? static_cast<const onert_tflite::AddNOptions *>(builtin_options())
: nullptr;
}
- const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+ const onert_tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const
{
- return builtin_options_type() == BuiltinOptions_GatherNdOptions
- ? static_cast<const GatherNdOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_GatherNdOptions
+ ? static_cast<const onert_tflite::GatherNdOptions *>(builtin_options())
: nullptr;
}
- const CosOptions *builtin_options_as_CosOptions() const
+ const onert_tflite::CosOptions *builtin_options_as_CosOptions() const
{
- return builtin_options_type() == BuiltinOptions_CosOptions
- ? static_cast<const CosOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_CosOptions
+ ? static_cast<const onert_tflite::CosOptions *>(builtin_options())
: nullptr;
}
- const WhereOptions *builtin_options_as_WhereOptions() const
+ const onert_tflite::WhereOptions *builtin_options_as_WhereOptions() const
{
- return builtin_options_type() == BuiltinOptions_WhereOptions
- ? static_cast<const WhereOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_WhereOptions
+ ? static_cast<const onert_tflite::WhereOptions *>(builtin_options())
: nullptr;
}
- const RankOptions *builtin_options_as_RankOptions() const
+ const onert_tflite::RankOptions *builtin_options_as_RankOptions() const
{
- return builtin_options_type() == BuiltinOptions_RankOptions
- ? static_cast<const RankOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_RankOptions
+ ? static_cast<const onert_tflite::RankOptions *>(builtin_options())
: nullptr;
}
- const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+ const onert_tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
{
- return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
- ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseSequenceOptions
+ ? static_cast<const onert_tflite::ReverseSequenceOptions *>(builtin_options())
: nullptr;
}
- const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+ const onert_tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
{
- return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
- ? static_cast<const MatrixDiagOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixDiagOptions
+ ? static_cast<const onert_tflite::MatrixDiagOptions *>(builtin_options())
: nullptr;
}
- const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+ const onert_tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const
{
- return builtin_options_type() == BuiltinOptions_QuantizeOptions
- ? static_cast<const QuantizeOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_QuantizeOptions
+ ? static_cast<const onert_tflite::QuantizeOptions *>(builtin_options())
: nullptr;
}
- const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+ const onert_tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
{
- return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
- ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixSetDiagOptions
+ ? static_cast<const onert_tflite::MatrixSetDiagOptions *>(builtin_options())
: nullptr;
}
- const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+ const onert_tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const
{
- return builtin_options_type() == BuiltinOptions_HardSwishOptions
- ? static_cast<const HardSwishOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_HardSwishOptions
+ ? static_cast<const onert_tflite::HardSwishOptions *>(builtin_options())
: nullptr;
}
- const IfOptions *builtin_options_as_IfOptions() const
+ const onert_tflite::IfOptions *builtin_options_as_IfOptions() const
{
- return builtin_options_type() == BuiltinOptions_IfOptions
- ? static_cast<const IfOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_IfOptions
+ ? static_cast<const onert_tflite::IfOptions *>(builtin_options())
: nullptr;
}
- const WhileOptions *builtin_options_as_WhileOptions() const
+ const onert_tflite::WhileOptions *builtin_options_as_WhileOptions() const
{
- return builtin_options_type() == BuiltinOptions_WhileOptions
- ? static_cast<const WhileOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_WhileOptions
+ ? static_cast<const onert_tflite::WhileOptions *>(builtin_options())
: nullptr;
}
- const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+ const onert_tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
{
- return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
- ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_DepthToSpaceOptions
+ ? static_cast<const onert_tflite::DepthToSpaceOptions *>(builtin_options())
: nullptr;
}
- const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+ const onert_tflite::NonMaxSuppressionV4Options *
+ builtin_options_as_NonMaxSuppressionV4Options() const
{
- return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
- ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV4Options
+ ? static_cast<const onert_tflite::NonMaxSuppressionV4Options *>(builtin_options())
: nullptr;
}
- const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+ const onert_tflite::NonMaxSuppressionV5Options *
+ builtin_options_as_NonMaxSuppressionV5Options() const
{
- return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
- ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV5Options
+ ? static_cast<const onert_tflite::NonMaxSuppressionV5Options *>(builtin_options())
: nullptr;
}
- const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+ const onert_tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
{
- return builtin_options_type() == BuiltinOptions_ScatterNdOptions
- ? static_cast<const ScatterNdOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ScatterNdOptions
+ ? static_cast<const onert_tflite::ScatterNdOptions *>(builtin_options())
: nullptr;
}
- const SelectV2Options *builtin_options_as_SelectV2Options() const
+ const onert_tflite::SelectV2Options *builtin_options_as_SelectV2Options() const
{
- return builtin_options_type() == BuiltinOptions_SelectV2Options
- ? static_cast<const SelectV2Options *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SelectV2Options
+ ? static_cast<const onert_tflite::SelectV2Options *>(builtin_options())
: nullptr;
}
- const DensifyOptions *builtin_options_as_DensifyOptions() const
+ const onert_tflite::DensifyOptions *builtin_options_as_DensifyOptions() const
{
- return builtin_options_type() == BuiltinOptions_DensifyOptions
- ? static_cast<const DensifyOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_DensifyOptions
+ ? static_cast<const onert_tflite::DensifyOptions *>(builtin_options())
: nullptr;
}
- const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+ const onert_tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
{
- return builtin_options_type() == BuiltinOptions_SegmentSumOptions
- ? static_cast<const SegmentSumOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_SegmentSumOptions
+ ? static_cast<const onert_tflite::SegmentSumOptions *>(builtin_options())
: nullptr;
}
- const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+ const onert_tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
{
- return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
- ? static_cast<const BatchMatMulOptions *>(builtin_options())
+ return builtin_options_type() == onert_tflite::BuiltinOptions_BatchMatMulOptions
+ ? static_cast<const onert_tflite::BatchMatMulOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::CumsumOptions *builtin_options_as_CumsumOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_CumsumOptions
+ ? static_cast<const onert_tflite::CumsumOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_CallOnceOptions
+ ? static_cast<const onert_tflite::CallOnceOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_BroadcastToOptions
+ ? static_cast<const onert_tflite::BroadcastToOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_Rfft2dOptions
+ ? static_cast<const onert_tflite::Rfft2dOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_Conv3DOptions
+ ? static_cast<const onert_tflite::Conv3DOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::HashtableOptions *builtin_options_as_HashtableOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableOptions
+ ? static_cast<const onert_tflite::HashtableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableFindOptions
+ ? static_cast<const onert_tflite::HashtableFindOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableImportOptions
+ ? static_cast<const onert_tflite::HashtableImportOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableSizeOptions
+ ? static_cast<const onert_tflite::HashtableSizeOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_VarHandleOptions
+ ? static_cast<const onert_tflite::VarHandleOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_ReadVariableOptions
+ ? static_cast<const onert_tflite::ReadVariableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_AssignVariableOptions
+ ? static_cast<const onert_tflite::AssignVariableOptions *>(builtin_options())
+ : nullptr;
+ }
+ const onert_tflite::RandomOptions *builtin_options_as_RandomOptions() const
+ {
+ return builtin_options_type() == onert_tflite::BuiltinOptions_RandomOptions
+ ? static_cast<const onert_tflite::RandomOptions *>(builtin_options())
: nullptr;
}
const flatbuffers::Vector<uint8_t> *custom_options() const
{
return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
}
- CustomOptionsFormat custom_options_format() const
+ onert_tflite::CustomOptionsFormat custom_options_format() const
{
- return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+ return static_cast<onert_tflite::CustomOptionsFormat>(
+ GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
}
const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
{
@@ -7878,550 +9207,806 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
}
};
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+template <>
+inline const onert_tflite::Conv2DOptions *
+Operator::builtin_options_as<onert_tflite::Conv2DOptions>() const
{
return builtin_options_as_Conv2DOptions();
}
template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+inline const onert_tflite::DepthwiseConv2DOptions *
+Operator::builtin_options_as<onert_tflite::DepthwiseConv2DOptions>() const
{
return builtin_options_as_DepthwiseConv2DOptions();
}
template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+inline const onert_tflite::ConcatEmbeddingsOptions *
+Operator::builtin_options_as<onert_tflite::ConcatEmbeddingsOptions>() const
{
return builtin_options_as_ConcatEmbeddingsOptions();
}
template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+inline const onert_tflite::LSHProjectionOptions *
+Operator::builtin_options_as<onert_tflite::LSHProjectionOptions>() const
{
return builtin_options_as_LSHProjectionOptions();
}
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+template <>
+inline const onert_tflite::Pool2DOptions *
+Operator::builtin_options_as<onert_tflite::Pool2DOptions>() const
{
return builtin_options_as_Pool2DOptions();
}
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+template <>
+inline const onert_tflite::SVDFOptions *
+Operator::builtin_options_as<onert_tflite::SVDFOptions>() const
{
return builtin_options_as_SVDFOptions();
}
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+template <>
+inline const onert_tflite::RNNOptions *
+Operator::builtin_options_as<onert_tflite::RNNOptions>() const
{
return builtin_options_as_RNNOptions();
}
template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+inline const onert_tflite::FullyConnectedOptions *
+Operator::builtin_options_as<onert_tflite::FullyConnectedOptions>() const
{
return builtin_options_as_FullyConnectedOptions();
}
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+template <>
+inline const onert_tflite::SoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::SoftmaxOptions>() const
{
return builtin_options_as_SoftmaxOptions();
}
template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+inline const onert_tflite::ConcatenationOptions *
+Operator::builtin_options_as<onert_tflite::ConcatenationOptions>() const
{
return builtin_options_as_ConcatenationOptions();
}
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+template <>
+inline const onert_tflite::AddOptions *
+Operator::builtin_options_as<onert_tflite::AddOptions>() const
{
return builtin_options_as_AddOptions();
}
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+template <>
+inline const onert_tflite::L2NormOptions *
+Operator::builtin_options_as<onert_tflite::L2NormOptions>() const
{
return builtin_options_as_L2NormOptions();
}
template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+inline const onert_tflite::LocalResponseNormalizationOptions *
+Operator::builtin_options_as<onert_tflite::LocalResponseNormalizationOptions>() const
{
return builtin_options_as_LocalResponseNormalizationOptions();
}
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+template <>
+inline const onert_tflite::LSTMOptions *
+Operator::builtin_options_as<onert_tflite::LSTMOptions>() const
{
return builtin_options_as_LSTMOptions();
}
template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+inline const onert_tflite::ResizeBilinearOptions *
+Operator::builtin_options_as<onert_tflite::ResizeBilinearOptions>() const
{
return builtin_options_as_ResizeBilinearOptions();
}
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+template <>
+inline const onert_tflite::CallOptions *
+Operator::builtin_options_as<onert_tflite::CallOptions>() const
{
return builtin_options_as_CallOptions();
}
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+template <>
+inline const onert_tflite::ReshapeOptions *
+Operator::builtin_options_as<onert_tflite::ReshapeOptions>() const
{
return builtin_options_as_ReshapeOptions();
}
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+template <>
+inline const onert_tflite::SkipGramOptions *
+Operator::builtin_options_as<onert_tflite::SkipGramOptions>() const
{
return builtin_options_as_SkipGramOptions();
}
template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+inline const onert_tflite::SpaceToDepthOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToDepthOptions>() const
{
return builtin_options_as_SpaceToDepthOptions();
}
template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+inline const onert_tflite::EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<onert_tflite::EmbeddingLookupSparseOptions>() const
{
return builtin_options_as_EmbeddingLookupSparseOptions();
}
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+template <>
+inline const onert_tflite::MulOptions *
+Operator::builtin_options_as<onert_tflite::MulOptions>() const
{
return builtin_options_as_MulOptions();
}
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+template <>
+inline const onert_tflite::PadOptions *
+Operator::builtin_options_as<onert_tflite::PadOptions>() const
{
return builtin_options_as_PadOptions();
}
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+template <>
+inline const onert_tflite::GatherOptions *
+Operator::builtin_options_as<onert_tflite::GatherOptions>() const
{
return builtin_options_as_GatherOptions();
}
template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+inline const onert_tflite::BatchToSpaceNDOptions *
+Operator::builtin_options_as<onert_tflite::BatchToSpaceNDOptions>() const
{
return builtin_options_as_BatchToSpaceNDOptions();
}
template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+inline const onert_tflite::SpaceToBatchNDOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToBatchNDOptions>() const
{
return builtin_options_as_SpaceToBatchNDOptions();
}
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+template <>
+inline const onert_tflite::TransposeOptions *
+Operator::builtin_options_as<onert_tflite::TransposeOptions>() const
{
return builtin_options_as_TransposeOptions();
}
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+template <>
+inline const onert_tflite::ReducerOptions *
+Operator::builtin_options_as<onert_tflite::ReducerOptions>() const
{
return builtin_options_as_ReducerOptions();
}
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+template <>
+inline const onert_tflite::SubOptions *
+Operator::builtin_options_as<onert_tflite::SubOptions>() const
{
return builtin_options_as_SubOptions();
}
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+template <>
+inline const onert_tflite::DivOptions *
+Operator::builtin_options_as<onert_tflite::DivOptions>() const
{
return builtin_options_as_DivOptions();
}
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+template <>
+inline const onert_tflite::SqueezeOptions *
+Operator::builtin_options_as<onert_tflite::SqueezeOptions>() const
{
return builtin_options_as_SqueezeOptions();
}
template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+inline const onert_tflite::SequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::SequenceRNNOptions>() const
{
return builtin_options_as_SequenceRNNOptions();
}
template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+inline const onert_tflite::StridedSliceOptions *
+Operator::builtin_options_as<onert_tflite::StridedSliceOptions>() const
{
return builtin_options_as_StridedSliceOptions();
}
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+template <>
+inline const onert_tflite::ExpOptions *
+Operator::builtin_options_as<onert_tflite::ExpOptions>() const
{
return builtin_options_as_ExpOptions();
}
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+template <>
+inline const onert_tflite::TopKV2Options *
+Operator::builtin_options_as<onert_tflite::TopKV2Options>() const
{
return builtin_options_as_TopKV2Options();
}
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+template <>
+inline const onert_tflite::SplitOptions *
+Operator::builtin_options_as<onert_tflite::SplitOptions>() const
{
return builtin_options_as_SplitOptions();
}
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+template <>
+inline const onert_tflite::LogSoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::LogSoftmaxOptions>() const
{
return builtin_options_as_LogSoftmaxOptions();
}
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+template <>
+inline const onert_tflite::CastOptions *
+Operator::builtin_options_as<onert_tflite::CastOptions>() const
{
return builtin_options_as_CastOptions();
}
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+template <>
+inline const onert_tflite::DequantizeOptions *
+Operator::builtin_options_as<onert_tflite::DequantizeOptions>() const
{
return builtin_options_as_DequantizeOptions();
}
template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+inline const onert_tflite::MaximumMinimumOptions *
+Operator::builtin_options_as<onert_tflite::MaximumMinimumOptions>() const
{
return builtin_options_as_MaximumMinimumOptions();
}
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+template <>
+inline const onert_tflite::ArgMaxOptions *
+Operator::builtin_options_as<onert_tflite::ArgMaxOptions>() const
{
return builtin_options_as_ArgMaxOptions();
}
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+template <>
+inline const onert_tflite::LessOptions *
+Operator::builtin_options_as<onert_tflite::LessOptions>() const
{
return builtin_options_as_LessOptions();
}
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+template <>
+inline const onert_tflite::NegOptions *
+Operator::builtin_options_as<onert_tflite::NegOptions>() const
{
return builtin_options_as_NegOptions();
}
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+template <>
+inline const onert_tflite::PadV2Options *
+Operator::builtin_options_as<onert_tflite::PadV2Options>() const
{
return builtin_options_as_PadV2Options();
}
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+template <>
+inline const onert_tflite::GreaterOptions *
+Operator::builtin_options_as<onert_tflite::GreaterOptions>() const
{
return builtin_options_as_GreaterOptions();
}
template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+inline const onert_tflite::GreaterEqualOptions *
+Operator::builtin_options_as<onert_tflite::GreaterEqualOptions>() const
{
return builtin_options_as_GreaterEqualOptions();
}
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+template <>
+inline const onert_tflite::LessEqualOptions *
+Operator::builtin_options_as<onert_tflite::LessEqualOptions>() const
{
return builtin_options_as_LessEqualOptions();
}
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+template <>
+inline const onert_tflite::SelectOptions *
+Operator::builtin_options_as<onert_tflite::SelectOptions>() const
{
return builtin_options_as_SelectOptions();
}
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+template <>
+inline const onert_tflite::SliceOptions *
+Operator::builtin_options_as<onert_tflite::SliceOptions>() const
{
return builtin_options_as_SliceOptions();
}
template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+inline const onert_tflite::TransposeConvOptions *
+Operator::builtin_options_as<onert_tflite::TransposeConvOptions>() const
{
return builtin_options_as_TransposeConvOptions();
}
template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+inline const onert_tflite::SparseToDenseOptions *
+Operator::builtin_options_as<onert_tflite::SparseToDenseOptions>() const
{
return builtin_options_as_SparseToDenseOptions();
}
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+template <>
+inline const onert_tflite::TileOptions *
+Operator::builtin_options_as<onert_tflite::TileOptions>() const
{
return builtin_options_as_TileOptions();
}
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+template <>
+inline const onert_tflite::ExpandDimsOptions *
+Operator::builtin_options_as<onert_tflite::ExpandDimsOptions>() const
{
return builtin_options_as_ExpandDimsOptions();
}
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+template <>
+inline const onert_tflite::EqualOptions *
+Operator::builtin_options_as<onert_tflite::EqualOptions>() const
{
return builtin_options_as_EqualOptions();
}
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+template <>
+inline const onert_tflite::NotEqualOptions *
+Operator::builtin_options_as<onert_tflite::NotEqualOptions>() const
{
return builtin_options_as_NotEqualOptions();
}
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+template <>
+inline const onert_tflite::ShapeOptions *
+Operator::builtin_options_as<onert_tflite::ShapeOptions>() const
{
return builtin_options_as_ShapeOptions();
}
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+template <>
+inline const onert_tflite::PowOptions *
+Operator::builtin_options_as<onert_tflite::PowOptions>() const
{
return builtin_options_as_PowOptions();
}
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+template <>
+inline const onert_tflite::ArgMinOptions *
+Operator::builtin_options_as<onert_tflite::ArgMinOptions>() const
{
return builtin_options_as_ArgMinOptions();
}
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+template <>
+inline const onert_tflite::FakeQuantOptions *
+Operator::builtin_options_as<onert_tflite::FakeQuantOptions>() const
{
return builtin_options_as_FakeQuantOptions();
}
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+template <>
+inline const onert_tflite::PackOptions *
+Operator::builtin_options_as<onert_tflite::PackOptions>() const
{
return builtin_options_as_PackOptions();
}
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+template <>
+inline const onert_tflite::LogicalOrOptions *
+Operator::builtin_options_as<onert_tflite::LogicalOrOptions>() const
{
return builtin_options_as_LogicalOrOptions();
}
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+template <>
+inline const onert_tflite::OneHotOptions *
+Operator::builtin_options_as<onert_tflite::OneHotOptions>() const
{
return builtin_options_as_OneHotOptions();
}
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+template <>
+inline const onert_tflite::LogicalAndOptions *
+Operator::builtin_options_as<onert_tflite::LogicalAndOptions>() const
{
return builtin_options_as_LogicalAndOptions();
}
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+template <>
+inline const onert_tflite::LogicalNotOptions *
+Operator::builtin_options_as<onert_tflite::LogicalNotOptions>() const
{
return builtin_options_as_LogicalNotOptions();
}
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+template <>
+inline const onert_tflite::UnpackOptions *
+Operator::builtin_options_as<onert_tflite::UnpackOptions>() const
{
return builtin_options_as_UnpackOptions();
}
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+template <>
+inline const onert_tflite::FloorDivOptions *
+Operator::builtin_options_as<onert_tflite::FloorDivOptions>() const
{
return builtin_options_as_FloorDivOptions();
}
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+template <>
+inline const onert_tflite::SquareOptions *
+Operator::builtin_options_as<onert_tflite::SquareOptions>() const
{
return builtin_options_as_SquareOptions();
}
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+template <>
+inline const onert_tflite::ZerosLikeOptions *
+Operator::builtin_options_as<onert_tflite::ZerosLikeOptions>() const
{
return builtin_options_as_ZerosLikeOptions();
}
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+template <>
+inline const onert_tflite::FillOptions *
+Operator::builtin_options_as<onert_tflite::FillOptions>() const
{
return builtin_options_as_FillOptions();
}
template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceLSTMOptions>() const
{
return builtin_options_as_BidirectionalSequenceLSTMOptions();
}
template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+inline const onert_tflite::BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceRNNOptions>() const
{
return builtin_options_as_BidirectionalSequenceRNNOptions();
}
template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::UnidirectionalSequenceLSTMOptions>() const
{
return builtin_options_as_UnidirectionalSequenceLSTMOptions();
}
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+template <>
+inline const onert_tflite::FloorModOptions *
+Operator::builtin_options_as<onert_tflite::FloorModOptions>() const
{
return builtin_options_as_FloorModOptions();
}
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+template <>
+inline const onert_tflite::RangeOptions *
+Operator::builtin_options_as<onert_tflite::RangeOptions>() const
{
return builtin_options_as_RangeOptions();
}
template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+inline const onert_tflite::ResizeNearestNeighborOptions *
+Operator::builtin_options_as<onert_tflite::ResizeNearestNeighborOptions>() const
{
return builtin_options_as_ResizeNearestNeighborOptions();
}
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+template <>
+inline const onert_tflite::LeakyReluOptions *
+Operator::builtin_options_as<onert_tflite::LeakyReluOptions>() const
{
return builtin_options_as_LeakyReluOptions();
}
template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
+inline const onert_tflite::SquaredDifferenceOptions *
+Operator::builtin_options_as<onert_tflite::SquaredDifferenceOptions>() const
{
return builtin_options_as_SquaredDifferenceOptions();
}
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+template <>
+inline const onert_tflite::MirrorPadOptions *
+Operator::builtin_options_as<onert_tflite::MirrorPadOptions>() const
{
return builtin_options_as_MirrorPadOptions();
}
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+template <>
+inline const onert_tflite::AbsOptions *
+Operator::builtin_options_as<onert_tflite::AbsOptions>() const
{
return builtin_options_as_AbsOptions();
}
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+template <>
+inline const onert_tflite::SplitVOptions *
+Operator::builtin_options_as<onert_tflite::SplitVOptions>() const
{
return builtin_options_as_SplitVOptions();
}
-template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+template <>
+inline const onert_tflite::UniqueOptions *
+Operator::builtin_options_as<onert_tflite::UniqueOptions>() const
{
return builtin_options_as_UniqueOptions();
}
-template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+template <>
+inline const onert_tflite::ReverseV2Options *
+Operator::builtin_options_as<onert_tflite::ReverseV2Options>() const
{
return builtin_options_as_ReverseV2Options();
}
-template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+template <>
+inline const onert_tflite::AddNOptions *
+Operator::builtin_options_as<onert_tflite::AddNOptions>() const
{
return builtin_options_as_AddNOptions();
}
-template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+template <>
+inline const onert_tflite::GatherNdOptions *
+Operator::builtin_options_as<onert_tflite::GatherNdOptions>() const
{
return builtin_options_as_GatherNdOptions();
}
-template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+template <>
+inline const onert_tflite::CosOptions *
+Operator::builtin_options_as<onert_tflite::CosOptions>() const
{
return builtin_options_as_CosOptions();
}
-template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+template <>
+inline const onert_tflite::WhereOptions *
+Operator::builtin_options_as<onert_tflite::WhereOptions>() const
{
return builtin_options_as_WhereOptions();
}
-template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+template <>
+inline const onert_tflite::RankOptions *
+Operator::builtin_options_as<onert_tflite::RankOptions>() const
{
return builtin_options_as_RankOptions();
}
template <>
-inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+inline const onert_tflite::ReverseSequenceOptions *
+Operator::builtin_options_as<onert_tflite::ReverseSequenceOptions>() const
{
return builtin_options_as_ReverseSequenceOptions();
}
-template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+template <>
+inline const onert_tflite::MatrixDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixDiagOptions>() const
{
return builtin_options_as_MatrixDiagOptions();
}
-template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+template <>
+inline const onert_tflite::QuantizeOptions *
+Operator::builtin_options_as<onert_tflite::QuantizeOptions>() const
{
return builtin_options_as_QuantizeOptions();
}
template <>
-inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+inline const onert_tflite::MatrixSetDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixSetDiagOptions>() const
{
return builtin_options_as_MatrixSetDiagOptions();
}
-template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+template <>
+inline const onert_tflite::HardSwishOptions *
+Operator::builtin_options_as<onert_tflite::HardSwishOptions>() const
{
return builtin_options_as_HardSwishOptions();
}
-template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+template <>
+inline const onert_tflite::IfOptions *Operator::builtin_options_as<onert_tflite::IfOptions>() const
{
return builtin_options_as_IfOptions();
}
-template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+template <>
+inline const onert_tflite::WhileOptions *
+Operator::builtin_options_as<onert_tflite::WhileOptions>() const
{
return builtin_options_as_WhileOptions();
}
template <>
-inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+inline const onert_tflite::DepthToSpaceOptions *
+Operator::builtin_options_as<onert_tflite::DepthToSpaceOptions>() const
{
return builtin_options_as_DepthToSpaceOptions();
}
template <>
-inline const NonMaxSuppressionV4Options *
-Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+inline const onert_tflite::NonMaxSuppressionV4Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV4Options>() const
{
return builtin_options_as_NonMaxSuppressionV4Options();
}
template <>
-inline const NonMaxSuppressionV5Options *
-Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+inline const onert_tflite::NonMaxSuppressionV5Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV5Options>() const
{
return builtin_options_as_NonMaxSuppressionV5Options();
}
-template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+template <>
+inline const onert_tflite::ScatterNdOptions *
+Operator::builtin_options_as<onert_tflite::ScatterNdOptions>() const
{
return builtin_options_as_ScatterNdOptions();
}
-template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+template <>
+inline const onert_tflite::SelectV2Options *
+Operator::builtin_options_as<onert_tflite::SelectV2Options>() const
{
return builtin_options_as_SelectV2Options();
}
-template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+template <>
+inline const onert_tflite::DensifyOptions *
+Operator::builtin_options_as<onert_tflite::DensifyOptions>() const
{
return builtin_options_as_DensifyOptions();
}
-template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+template <>
+inline const onert_tflite::SegmentSumOptions *
+Operator::builtin_options_as<onert_tflite::SegmentSumOptions>() const
{
return builtin_options_as_SegmentSumOptions();
}
template <>
-inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+inline const onert_tflite::BatchMatMulOptions *
+Operator::builtin_options_as<onert_tflite::BatchMatMulOptions>() const
{
return builtin_options_as_BatchMatMulOptions();
}
+template <>
+inline const onert_tflite::CumsumOptions *
+Operator::builtin_options_as<onert_tflite::CumsumOptions>() const
+{
+ return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const onert_tflite::CallOnceOptions *
+Operator::builtin_options_as<onert_tflite::CallOnceOptions>() const
+{
+ return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const onert_tflite::BroadcastToOptions *
+Operator::builtin_options_as<onert_tflite::BroadcastToOptions>() const
+{
+ return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const onert_tflite::Rfft2dOptions *
+Operator::builtin_options_as<onert_tflite::Rfft2dOptions>() const
+{
+ return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const onert_tflite::Conv3DOptions *
+Operator::builtin_options_as<onert_tflite::Conv3DOptions>() const
+{
+ return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableOptions *
+Operator::builtin_options_as<onert_tflite::HashtableOptions>() const
+{
+ return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableFindOptions *
+Operator::builtin_options_as<onert_tflite::HashtableFindOptions>() const
+{
+ return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableImportOptions *
+Operator::builtin_options_as<onert_tflite::HashtableImportOptions>() const
+{
+ return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableSizeOptions *
+Operator::builtin_options_as<onert_tflite::HashtableSizeOptions>() const
+{
+ return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const onert_tflite::VarHandleOptions *
+Operator::builtin_options_as<onert_tflite::VarHandleOptions>() const
+{
+ return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const onert_tflite::ReadVariableOptions *
+Operator::builtin_options_as<onert_tflite::ReadVariableOptions>() const
+{
+ return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const onert_tflite::AssignVariableOptions *
+Operator::builtin_options_as<onert_tflite::AssignVariableOptions>() const
+{
+ return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const onert_tflite::RandomOptions *
+Operator::builtin_options_as<onert_tflite::RandomOptions>() const
+{
+ return builtin_options_as_RandomOptions();
+}
+
struct OperatorBuilder
{
+ typedef Operator Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_opcode_index(uint32_t opcode_index)
@@ -8436,7 +10021,7 @@ struct OperatorBuilder
{
fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
}
- void add_builtin_options_type(BuiltinOptions builtin_options_type)
+ void add_builtin_options_type(onert_tflite::BuiltinOptions builtin_options_type)
{
fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
static_cast<uint8_t>(builtin_options_type), 0);
@@ -8449,7 +10034,7 @@ struct OperatorBuilder
{
fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
}
- void add_custom_options_format(CustomOptionsFormat custom_options_format)
+ void add_custom_options_format(onert_tflite::CustomOptionsFormat custom_options_format)
{
fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
static_cast<int8_t>(custom_options_format), 0);
@@ -8467,7 +10052,6 @@ struct OperatorBuilder
{
start_ = fbb_.StartTable();
}
- OperatorBuilder &operator=(const OperatorBuilder &);
flatbuffers::Offset<Operator> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8476,16 +10060,17 @@ struct OperatorBuilder
}
};
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+inline flatbuffers::Offset<Operator> CreateOperator(
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+ onert_tflite::CustomOptionsFormat custom_options_format =
+ onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
{
OperatorBuilder builder_(_fbb);
builder_.add_intermediates(intermediates);
@@ -8500,28 +10085,32 @@ CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
return builder_.Finish();
}
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- const std::vector<uint8_t> *custom_options = nullptr,
- CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
- const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
- const std::vector<int32_t> *intermediates = nullptr)
-{
- return onert_tflite::CreateOperator(
- _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
- custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
- intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ const std::vector<uint8_t> *custom_options = nullptr,
+ onert_tflite::CustomOptionsFormat custom_options_format =
+ onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+ const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+ const std::vector<int32_t> *intermediates = nullptr)
+{
+ auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+ auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+ auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+ auto mutating_variable_inputs__ =
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+ auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+ return onert_tflite::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
+ builtin_options, custom_options__, custom_options_format,
+ mutating_variable_inputs__, intermediates__);
}
struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef SubGraphBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_TENSORS = 4,
VT_INPUTS = 6,
@@ -8529,9 +10118,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_OPERATORS = 10,
VT_NAME = 12
};
- const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *>(
+ VT_TENSORS);
}
const flatbuffers::Vector<int32_t> *inputs() const
{
@@ -8541,9 +10131,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
}
- const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *operators() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *>(
+ VT_OPERATORS);
}
const flatbuffers::String *name() const
{
@@ -8563,9 +10154,11 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct SubGraphBuilder
{
+ typedef SubGraph Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
- void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+ void add_tensors(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors)
{
fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
}
@@ -8577,8 +10170,8 @@ struct SubGraphBuilder
{
fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
}
- void
- add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+ void add_operators(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators)
{
fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
}
@@ -8590,7 +10183,6 @@ struct SubGraphBuilder
{
start_ = fbb_.StartTable();
}
- SubGraphBuilder &operator=(const SubGraphBuilder &);
flatbuffers::Offset<SubGraph> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8601,10 +10193,11 @@ struct SubGraphBuilder
inline flatbuffers::Offset<SubGraph> CreateSubGraph(
flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators =
+ 0,
flatbuffers::Offset<flatbuffers::String> name = 0)
{
SubGraphBuilder builder_(_fbb);
@@ -8618,21 +10211,25 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors = nullptr,
const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr)
+ const std::vector<flatbuffers::Offset<onert_tflite::Operator>> *operators = nullptr,
+ const char *name = nullptr)
{
- return onert_tflite::CreateSubGraph(
- _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
- inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
- operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
- name ? _fbb.CreateString(name) : 0);
+ auto tensors__ =
+ tensors ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Tensor>>(*tensors) : 0;
+ auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+ auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+ auto operators__ =
+ operators ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Operator>>(*operators) : 0;
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ return onert_tflite::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__);
}
struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef BufferBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_DATA = 4
};
@@ -8649,6 +10246,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct BufferBuilder
{
+ typedef Buffer Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
@@ -8659,7 +10257,6 @@ struct BufferBuilder
{
start_ = fbb_.StartTable();
}
- BufferBuilder &operator=(const BufferBuilder &);
flatbuffers::Offset<Buffer> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8680,12 +10277,18 @@ CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<uint8_t> *data = nullptr)
{
- return onert_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+ if (data)
+ {
+ _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16);
+ }
+ auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+ return onert_tflite::CreateBuffer(_fbb, data__);
}
struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef MetadataBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NAME = 4,
VT_BUFFER = 6
@@ -8705,6 +10308,7 @@ struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
struct MetadataBuilder
{
+ typedef Metadata Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_name(flatbuffers::Offset<flatbuffers::String> name)
@@ -8716,7 +10320,6 @@ struct MetadataBuilder
{
start_ = fbb_.StartTable();
}
- MetadataBuilder &operator=(const MetadataBuilder &);
flatbuffers::Offset<Metadata> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8739,12 +10342,179 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
const char *name = nullptr,
uint32_t buffer = 0)
{
- return onert_tflite::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ return onert_tflite::CreateMetadata(_fbb, name__, buffer);
+}
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef TensorMapBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_NAME = 4,
+ VT_TENSOR_INDEX = 6
+ };
+ const flatbuffers::String *name() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_NAME);
+ }
+ uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+ verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+ verifier.EndTable();
+ }
+};
+
+struct TensorMapBuilder
+{
+ typedef TensorMap Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_name(flatbuffers::Offset<flatbuffers::String> name)
+ {
+ fbb_.AddOffset(TensorMap::VT_NAME, name);
+ }
+ void add_tensor_index(uint32_t tensor_index)
+ {
+ fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+ }
+ explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<TensorMap> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<TensorMap>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+ TensorMapBuilder builder_(_fbb);
+ builder_.add_tensor_index(tensor_index);
+ builder_.add_name(name);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+ const char *name = nullptr,
+ uint32_t tensor_index = 0)
+{
+ auto name__ = name ? _fbb.CreateString(name) : 0;
+ return onert_tflite::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+ typedef SignatureDefBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+ {
+ VT_INPUTS = 4,
+ VT_OUTPUTS = 6,
+ VT_SIGNATURE_KEY = 8,
+ VT_SUBGRAPH_INDEX = 12
+ };
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+ VT_INPUTS);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+ VT_OUTPUTS);
+ }
+ const flatbuffers::String *signature_key() const
+ {
+ return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+ }
+ uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+ bool Verify(flatbuffers::Verifier &verifier) const
+ {
+ return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+ verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+ VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+ verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+ verifier.VerifyString(signature_key()) &&
+ VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+ }
+};
+
+struct SignatureDefBuilder
+{
+ typedef SignatureDef Table;
+ flatbuffers::FlatBufferBuilder &fbb_;
+ flatbuffers::uoffset_t start_;
+ void add_inputs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs)
+ {
+ fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+ }
+ void add_outputs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs)
+ {
+ fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+ }
+ void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+ {
+ fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+ }
+ void add_subgraph_index(uint32_t subgraph_index)
+ {
+ fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+ }
+ explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+ {
+ start_ = fbb_.StartTable();
+ }
+ flatbuffers::Offset<SignatureDef> Finish()
+ {
+ const auto end = fbb_.EndTable(start_);
+ auto o = flatbuffers::Offset<SignatureDef>(end);
+ return o;
+ }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs =
+ 0,
+ flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+ SignatureDefBuilder builder_(_fbb);
+ builder_.add_subgraph_index(subgraph_index);
+ builder_.add_signature_key(signature_key);
+ builder_.add_outputs(outputs);
+ builder_.add_inputs(inputs);
+ return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs = nullptr,
+ const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+ auto inputs__ =
+ inputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*inputs) : 0;
+ auto outputs__ =
+ outputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*outputs) : 0;
+ auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+ return onert_tflite::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__,
+ subgraph_index);
}
struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
{
- enum
+ typedef ModelBuilder Builder;
+ enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_VERSION = 4,
VT_OPERATOR_CODES = 6,
@@ -8752,33 +10522,42 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_DESCRIPTION = 10,
VT_BUFFERS = 12,
VT_METADATA_BUFFER = 14,
- VT_METADATA = 16
+ VT_METADATA = 16,
+ VT_SIGNATURE_DEFS = 18
};
uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
- const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *>(
VT_OPERATOR_CODES);
}
- const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *>(
+ VT_SUBGRAPHS);
}
const flatbuffers::String *description() const
{
return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
}
- const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *>(
+ VT_BUFFERS);
}
const flatbuffers::Vector<int32_t> *metadata_buffer() const
{
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
}
- const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata() const
{
- return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *>(
+ VT_METADATA);
+ }
+ const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs() const
+ {
+ return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *>(
+ VT_SIGNATURE_DEFS);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -8791,22 +10570,26 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
- verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+ verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+ verifier.VerifyVector(signature_defs()) &&
+ verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
}
};
struct ModelBuilder
{
+ typedef Model Table;
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+ operator_codes)
{
fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
}
- void
- add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+ void add_subgraphs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs)
{
fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
}
@@ -8814,7 +10597,8 @@ struct ModelBuilder
{
fbb_.AddOffset(Model::VT_DESCRIPTION, description);
}
- void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+ void add_buffers(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers)
{
fbb_.AddOffset(Model::VT_BUFFERS, buffers);
}
@@ -8822,16 +10606,21 @@ struct ModelBuilder
{
fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
}
- void
- add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+ void add_metadata(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata)
{
fbb_.AddOffset(Model::VT_METADATA, metadata);
}
+ void add_signature_defs(
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+ signature_defs)
+ {
+ fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+ }
explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
- ModelBuilder &operator=(const ModelBuilder &);
flatbuffers::Offset<Model> Finish()
{
const auto end = fbb_.EndTable(start_);
@@ -8842,14 +10631,20 @@ struct ModelBuilder
inline flatbuffers::Offset<Model> CreateModel(
flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+ operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs =
+ 0,
flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata =
+ 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+ signature_defs = 0)
{
ModelBuilder builder_(_fbb);
+ builder_.add_signature_defs(signature_defs);
builder_.add_metadata(metadata);
builder_.add_metadata_buffer(metadata_buffer);
builder_.add_buffers(buffers);
@@ -8860,23 +10655,34 @@ inline flatbuffers::Offset<Model> CreateModel(
return builder_.Finish();
}
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
- const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
- const char *description = nullptr,
- const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
- const std::vector<int32_t> *metadata_buffer = nullptr,
- const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
-{
- return onert_tflite::CreateModel(
- _fbb, version,
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
- description ? _fbb.CreateString(description) : 0,
- buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
- metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
- metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+inline flatbuffers::Offset<Model> CreateModelDirect(
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ const std::vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs = nullptr,
+ const char *description = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers = nullptr,
+ const std::vector<int32_t> *metadata_buffer = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata = nullptr,
+ const std::vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs = nullptr)
+{
+ auto operator_codes__ =
+ operator_codes
+ ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::OperatorCode>>(*operator_codes)
+ : 0;
+ auto subgraphs__ =
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SubGraph>>(*subgraphs) : 0;
+ auto description__ = description ? _fbb.CreateString(description) : 0;
+ auto buffers__ =
+ buffers ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Buffer>>(*buffers) : 0;
+ auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+ auto metadata__ =
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Metadata>>(*metadata) : 0;
+ auto signature_defs__ =
+ signature_defs
+ ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SignatureDef>>(*signature_defs)
+ : 0;
+ return onert_tflite::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__,
+ buffers__, metadata_buffer__, metadata__, signature_defs__);
}
inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -8890,11 +10696,11 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi
}
case QuantizationDetails_CustomQuantization:
{
- auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::CustomQuantization *>(obj);
return verifier.VerifyTable(ptr);
}
default:
- return false;
+ return true;
}
}
@@ -8929,21 +10735,21 @@ inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void
}
case SparseIndexVector_Int32Vector:
{
- auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::Int32Vector *>(obj);
return verifier.VerifyTable(ptr);
}
case SparseIndexVector_Uint16Vector:
{
- auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::Uint16Vector *>(obj);
return verifier.VerifyTable(ptr);
}
case SparseIndexVector_Uint8Vector:
{
- auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::Uint8Vector *>(obj);
return verifier.VerifyTable(ptr);
}
default:
- return false;
+ return true;
}
}
@@ -8977,511 +10783,576 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
}
case BuiltinOptions_Conv2DOptions:
{
- auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::Conv2DOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_DepthwiseConv2DOptions:
{
- auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::DepthwiseConv2DOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ConcatEmbeddingsOptions:
{
- auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ConcatEmbeddingsOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LSHProjectionOptions:
{
- auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LSHProjectionOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_Pool2DOptions:
{
- auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::Pool2DOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SVDFOptions:
{
- auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SVDFOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_RNNOptions:
{
- auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::RNNOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_FullyConnectedOptions:
{
- auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::FullyConnectedOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SoftmaxOptions:
{
- auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SoftmaxOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ConcatenationOptions:
{
- auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ConcatenationOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_AddOptions:
{
- auto ptr = reinterpret_cast<const AddOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::AddOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_L2NormOptions:
{
- auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::L2NormOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LocalResponseNormalizationOptions:
{
- auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LocalResponseNormalizationOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LSTMOptions:
{
- auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LSTMOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ResizeBilinearOptions:
{
- auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ResizeBilinearOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_CallOptions:
{
- auto ptr = reinterpret_cast<const CallOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::CallOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ReshapeOptions:
{
- auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ReshapeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SkipGramOptions:
{
- auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SkipGramOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SpaceToDepthOptions:
{
- auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SpaceToDepthOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_EmbeddingLookupSparseOptions:
{
- auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_MulOptions:
{
- auto ptr = reinterpret_cast<const MulOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::MulOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_PadOptions:
{
- auto ptr = reinterpret_cast<const PadOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::PadOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_GatherOptions:
{
- auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::GatherOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_BatchToSpaceNDOptions:
{
- auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::BatchToSpaceNDOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SpaceToBatchNDOptions:
{
- auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SpaceToBatchNDOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_TransposeOptions:
{
- auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::TransposeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ReducerOptions:
{
- auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ReducerOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SubOptions:
{
- auto ptr = reinterpret_cast<const SubOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SubOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_DivOptions:
{
- auto ptr = reinterpret_cast<const DivOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::DivOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SqueezeOptions:
{
- auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SqueezeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SequenceRNNOptions:
{
- auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SequenceRNNOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_StridedSliceOptions:
{
- auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::StridedSliceOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ExpOptions:
{
- auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ExpOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_TopKV2Options:
{
- auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::TopKV2Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SplitOptions:
{
- auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SplitOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LogSoftmaxOptions:
{
- auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LogSoftmaxOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_CastOptions:
{
- auto ptr = reinterpret_cast<const CastOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::CastOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_DequantizeOptions:
{
- auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::DequantizeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_MaximumMinimumOptions:
{
- auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::MaximumMinimumOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ArgMaxOptions:
{
- auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ArgMaxOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LessOptions:
{
- auto ptr = reinterpret_cast<const LessOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LessOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_NegOptions:
{
- auto ptr = reinterpret_cast<const NegOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::NegOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_PadV2Options:
{
- auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::PadV2Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_GreaterOptions:
{
- auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::GreaterOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_GreaterEqualOptions:
{
- auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::GreaterEqualOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LessEqualOptions:
{
- auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LessEqualOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SelectOptions:
{
- auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SelectOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SliceOptions:
{
- auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SliceOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_TransposeConvOptions:
{
- auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::TransposeConvOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SparseToDenseOptions:
{
- auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SparseToDenseOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_TileOptions:
{
- auto ptr = reinterpret_cast<const TileOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::TileOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ExpandDimsOptions:
{
- auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ExpandDimsOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_EqualOptions:
{
- auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::EqualOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_NotEqualOptions:
{
- auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::NotEqualOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ShapeOptions:
{
- auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ShapeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_PowOptions:
{
- auto ptr = reinterpret_cast<const PowOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::PowOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ArgMinOptions:
{
- auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ArgMinOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_FakeQuantOptions:
{
- auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::FakeQuantOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_PackOptions:
{
- auto ptr = reinterpret_cast<const PackOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::PackOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LogicalOrOptions:
{
- auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LogicalOrOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_OneHotOptions:
{
- auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::OneHotOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LogicalAndOptions:
{
- auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LogicalAndOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LogicalNotOptions:
{
- auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LogicalNotOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_UnpackOptions:
{
- auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::UnpackOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_FloorDivOptions:
{
- auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::FloorDivOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SquareOptions:
{
- auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SquareOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ZerosLikeOptions:
{
- auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ZerosLikeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_FillOptions:
{
- auto ptr = reinterpret_cast<const FillOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::FillOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_BidirectionalSequenceLSTMOptions:
{
- auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_BidirectionalSequenceRNNOptions:
{
- auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
{
- auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_FloorModOptions:
{
- auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::FloorModOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_RangeOptions:
{
- auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::RangeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ResizeNearestNeighborOptions:
{
- auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ResizeNearestNeighborOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_LeakyReluOptions:
{
- auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::LeakyReluOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SquaredDifferenceOptions:
{
- auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SquaredDifferenceOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_MirrorPadOptions:
{
- auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::MirrorPadOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_AbsOptions:
{
- auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::AbsOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SplitVOptions:
{
- auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SplitVOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_UniqueOptions:
{
- auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::UniqueOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ReverseV2Options:
{
- auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ReverseV2Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_AddNOptions:
{
- auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::AddNOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_GatherNdOptions:
{
- auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::GatherNdOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_CosOptions:
{
- auto ptr = reinterpret_cast<const CosOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::CosOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_WhereOptions:
{
- auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::WhereOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_RankOptions:
{
- auto ptr = reinterpret_cast<const RankOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::RankOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ReverseSequenceOptions:
{
- auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ReverseSequenceOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_MatrixDiagOptions:
{
- auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::MatrixDiagOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_QuantizeOptions:
{
- auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::QuantizeOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_MatrixSetDiagOptions:
{
- auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::MatrixSetDiagOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_HardSwishOptions:
{
- auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::HardSwishOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_IfOptions:
{
- auto ptr = reinterpret_cast<const IfOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::IfOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_WhileOptions:
{
- auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::WhileOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_DepthToSpaceOptions:
{
- auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::DepthToSpaceOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_NonMaxSuppressionV4Options:
{
- auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV4Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_NonMaxSuppressionV5Options:
{
- auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV5Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_ScatterNdOptions:
{
- auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::ScatterNdOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SelectV2Options:
{
- auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SelectV2Options *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_DensifyOptions:
{
- auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::DensifyOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_SegmentSumOptions:
{
- auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::SegmentSumOptions *>(obj);
return verifier.VerifyTable(ptr);
}
case BuiltinOptions_BatchMatMulOptions:
{
- auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+ auto ptr = reinterpret_cast<const onert_tflite::BatchMatMulOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CumsumOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::CumsumOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_CallOnceOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::CallOnceOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_BroadcastToOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::BroadcastToOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Rfft2dOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::Rfft2dOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_Conv3DOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::Conv3DOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::HashtableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableFindOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::HashtableFindOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableImportOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::HashtableImportOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_HashtableSizeOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::HashtableSizeOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_VarHandleOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::VarHandleOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_ReadVariableOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::ReadVariableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_AssignVariableOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::AssignVariableOptions *>(obj);
+ return verifier.VerifyTable(ptr);
+ }
+ case BuiltinOptions_RandomOptions:
+ {
+ auto ptr = reinterpret_cast<const onert_tflite::RandomOptions *>(obj);
return verifier.VerifyTable(ptr);
}
default:
- return false;
+ return true;
}
}
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt
new file mode 100644
index 000000000..7a0df4eaa
--- /dev/null
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -0,0 +1,21 @@
+if (NOT BUILD_TRIX_LOADER)
+ return()
+endif ()
+
+nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+if(TRIXEngine_FOUND)
+ list(APPEND SOURCES src/trix_loader.cc)
+else()
+ list(APPEND SOURCES src/trix_loader_dummy.cc)
+endif(TRIXEngine_FOUND)
+
+add_library(trix_loader STATIC ${SOURCES})
+set_target_properties(trix_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(trix_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(trix_loader PRIVATE onert_core)
+target_link_libraries(trix_loader PRIVATE nnfw_common nnfw_coverage)
+
+if(TRIXEngine_FOUND)
+ target_include_directories(trix_loader PUBLIC ${TRIXEngine_INCLUDE_DIR})
+ target_link_libraries(trix_loader PRIVATE trix_engine)
+endif(TRIXEngine_FOUND)
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h
new file mode 100644
index 000000000..297d5ec28
--- /dev/null
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRIX_TRIX_LOADER_H__
+#define __TRIX_TRIX_LOADER_H__
+
+#include "ir/Graph.h"
+#include <memory>
+
+namespace onert
+{
+namespace trix_loader
+{
+/**
+ * @throw runtime_error when tvn path is wrong or tvn is invalid
+ */
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+} // namespace trix_loader
+} // namespace onert
+
+#endif // __TRIX_TRIX_LOADER_H__
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc
new file mode 100644
index 000000000..e2995bbd1
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Bulk.h"
+
+#include <libnpuhost.h>
+#include <npubinfmt.h>
+#include <typedef.h>
+
+namespace onert
+{
+namespace trix_loader
+{
+
+/**
+ * @brief A tvn metadata reader
+ */
+class TrixMetaReader
+{
+public:
+ TrixMetaReader() = default;
+ ~TrixMetaReader() { free(_meta); }
+
+ void init(const char *path);
+ data_layout input_seg_layout(uint32_t n) const { return _meta->input_seg_layout[n]; }
+ data_layout output_seg_layout(uint32_t n) const { return _meta->output_seg_layout[n]; }
+ data_type input_seg_quant_type(uint32_t n) const { return _meta->input_seg_quant_type[n]; }
+ data_type output_seg_quant_type(uint32_t n) const { return _meta->output_seg_quant_type[n]; }
+ float input_seg_quant_scale(uint32_t n) const { return _meta->input_seg_quant_s[n]; }
+ float output_seg_quant_scale(uint32_t n) const { return _meta->output_seg_quant_s[n]; }
+ int32_t input_seg_quant_zp(uint32_t n) { return _meta->input_seg_quant_z[n]; }
+ int32_t output_seg_quant_zp(uint32_t n) { return _meta->output_seg_quant_z[n]; }
+ uint32_t input_seg_num() const { return _meta->input_seg_num; }
+ uint32_t output_seg_num() const { return _meta->output_seg_num; }
+ uint32_t input_seg_dims(uint32_t n, uint32_t axis) const
+ {
+ return _meta->input_seg_dims[n][axis];
+ }
+ uint32_t output_seg_dims(uint32_t n, uint32_t axis) const
+ {
+ return _meta->output_seg_dims[n][axis];
+ }
+
+private:
+ npubin_meta *_meta = nullptr;
+};
+
+void TrixMetaReader::init(const char *path)
+{
+ assert(path);
+ _meta = getNPUmodel_metadata(path, false);
+ if (_meta == nullptr)
+ {
+ throw std::runtime_error("Failed to get TRIV2 model metadata");
+ }
+ if (NPUBIN_VERSION(_meta->magiccode) != 3)
+ {
+ throw std::runtime_error("TRIV2 model metadata version mismatched.");
+ }
+}
+
+class TrixLoader
+{
+public:
+ /**
+ * @brief Construct a new Loader object
+ *
+ * @param graph reference on subgraphs
+ */
+ explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {}
+
+ /**
+ * @brief Load a model from file
+ * @param file_path
+ */
+ void loadFromFile(const std::string &file_path);
+
+private:
+ /*
+ * @brief Load actually
+ * @throw runtime_error when tvn path is wrong or tvn is invalid
+ */
+ void loadModel();
+ void loadSubgraphs();
+ std::unique_ptr<ir::Graph> loadSubgraph();
+ void loadOperands(ir::Graph &subg);
+ ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
+ ir::OperandIndex loadOperandFromOutput(uint32_t i, ir::Graph &subg);
+ void loadBulk(ir::Graph &subg);
+ void loadOperationIO(ir::OperandIndexSequence &inputs, ir::OperandIndexSequence &outputs);
+ ir::OperandIndex inputIdxToOperandIdx(uint32_t i) const;
+ ir::OperandIndex outputIdxToOperandIdx(uint32_t i) const;
+ ir::DataType toDataType(const data_type type) const;
+
+private:
+protected:
+ /** path to model (e.g. tvn) */
+ std::string _model_path;
+ /** Reference on loadable subgraphs */
+ std::unique_ptr<ir::Subgraphs> &_subgraphs;
+ TrixMetaReader _meta;
+};
+
+ir::DataType TrixLoader::toDataType(const data_type type) const
+{
+ switch (type)
+ {
+ case DATA_TYPE_QASYMM8:
+ return ir::DataType::QUANT_UINT8_ASYMM;
+ case DATA_TYPE_QSYMM16:
+ return ir::DataType::QUANT_INT16_SYMM;
+ default:
+ throw std::runtime_error("Unsupported data type from trix model");
+ }
+}
+
+ir::OperandIndex TrixLoader::inputIdxToOperandIdx(uint32_t i) const { return ir::OperandIndex(i); }
+ir::OperandIndex TrixLoader::outputIdxToOperandIdx(uint32_t i) const
+{
+ return ir::OperandIndex(_meta.input_seg_num() + i);
+}
+
+void TrixLoader::loadOperationIO(ir::OperandIndexSequence &inputs,
+ ir::OperandIndexSequence &outputs)
+{
+ for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+ {
+ inputs.append(inputIdxToOperandIdx(i));
+ }
+
+ for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+ {
+ outputs.append(outputIdxToOperandIdx(i));
+ }
+}
+
+void TrixLoader::loadBulk(ir::Graph &subg)
+{
+ ir::operation::Bulk::Param param;
+ param.binary_path = _model_path;
+
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(inputs, outputs);
+
+ std::unique_ptr<ir::operation::Bulk> bulk(new ir::operation::Bulk(inputs, outputs, param));
+ subg.addOperation(std::move(bulk));
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
+{
+ // Shape
+ ir::Shape shape;
+ for (uint32_t d = 0; d < MAX_RANK; ++d)
+ shape.append(_meta.input_seg_dims(idx, d));
+
+ // TypeInfo
+ ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
+ _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
+
+ // Create operand
+ const auto operand_index = subg.addOperand(shape, type_info);
+ return operand_index;
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg)
+{
+ // Shape
+ ir::Shape shape;
+ for (uint32_t d = 0; d < MAX_RANK; ++d)
+ shape.append(_meta.output_seg_dims(idx, d));
+
+ // TypeInfo
+ ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
+ _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
+
+ // Create operand
+ const auto operand_index = subg.addOperand(shape, type_info);
+ return operand_index;
+}
+
+void TrixLoader::loadOperands(ir::Graph &subg)
+{
+ auto in_num = _meta.input_seg_num();
+ for (uint32_t i = 0; i < in_num; ++i)
+ {
+ loadOperandFromInput(i, subg);
+ }
+ auto out_num = _meta.output_seg_num();
+ for (uint32_t i = 0; i < out_num; ++i)
+ {
+ loadOperandFromOutput(i, subg);
+ }
+}
+
+std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
+{
+ auto subg = std::make_unique<ir::Graph>();
+ _meta.init(_model_path.c_str());
+
+ // Load tensors
+ loadOperands(*subg);
+
+ // Set inputs
+ for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+ {
+ subg->addInput(inputIdxToOperandIdx(i), "tvn_input" + std::to_string(i));
+ }
+ // Set outputs
+ for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+ {
+ subg->addOutput(outputIdxToOperandIdx(i), "tvn_out" + std::to_string(i));
+ }
+ // Create operations
+ loadBulk(*subg);
+
+ // TODO: NHWC only supported at this moment.
+ subg->setLayout(ir::Layout::NHWC);
+ subg->verify();
+ return subg;
+}
+
+void TrixLoader::loadSubgraphs()
+{
+ // one subgraph only
+ auto subg = loadSubgraph();
+ _subgraphs->push(ir::SubgraphIndex(0), std::move(subg));
+}
+
+void TrixLoader::loadModel() { loadSubgraphs(); }
+
+void TrixLoader::loadFromFile(const std::string &file_path)
+{
+ // model path will be used to set Bulk param
+ _model_path = file_path;
+ // metadata is initialized from model path since it is loadFromFile
+ _meta.init(_model_path.c_str());
+ loadModel();
+}
+
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+{
+ auto subgraphs = std::make_unique<ir::Subgraphs>();
+ TrixLoader loader(subgraphs);
+ loader.loadFromFile(filename);
+ return subgraphs;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
new file mode 100644
index 000000000..9fc8e1ff2
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+// Dummy implementation to avoid build error for target, which doesn't have trix_engine
+
+namespace onert
+{
+namespace trix_loader
+{
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &)
+{
+ auto subgraphs = std::make_unique<ir::Subgraphs>();
+ return subgraphs;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
index 279c000ba..6663b79c0 100644
--- a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
@@ -3,8 +3,8 @@ model = Model()
i1 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2}")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("EQUAL", i1, i2).To(i3)
# Example 1. Input in operand 0,
input0 = {i1: # input 0
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
index 0ffc8fb44..e4e4ba1af 100644
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -525,6 +525,13 @@ uint32_t CircleGen::addOperatorSquare(const OperatorParams &params)
circle::BuiltinOptions_SquareOptions, options);
}
+uint32_t CircleGen::addOperatorBatchToSpaceND(const OperatorParams &params)
+{
+ auto options = circle::CreateBatchToSpaceNDOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
+ circle::BuiltinOptions_BatchToSpaceNDOptions, options);
+}
+
// NOTE Please add addOperator functions ABOVE this lie
//
// % How to add a new addOperatorXXX fuction
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index f6f799668..062a8d35a 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -146,6 +146,7 @@ public:
uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
int stride_w, int stride_h, int filter_w, int filter_h,
circle::ActivationFunctionType actfn);
+ uint32_t addOperatorBatchToSpaceND(const OperatorParams &params);
uint32_t addOperatorCast(const OperatorParams &params, circle::TensorType input_type,
circle::TensorType output_type);
uint32_t addOperatorConcatenation(const OperatorParams &params, int axis,
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.cc
index a17a7e8ba..53a3571db 100644
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ b/tests/nnfw_api/src/GenModelTests.cc
@@ -308,7 +308,7 @@ TEST_F(GenModelTest, Reshape_without_shape_param)
CircleGen::Shape new_shape_val{2, 2};
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<int32_t>({{1, 2, 3, 4}, new_shape_val}, {{1, 2, 3, 4}}));
- _context->output_sizes(0, sizeof(i32) * 4);
+ _context->output_sizes(0, sizeof(int32_t) * 4);
_context->setBackends({"cpu" /* "acl_cl", "acl_neon" does not support dynamic tensor */});
SUCCEED();
diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc
new file mode 100644
index 000000000..3f4554302
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_1x1)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+ cgen.setInputsAndOutputs({in, block}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<float>({1, 2, 3, 4})
+ .addInput<int32_t>({2, 2})
+ .addOutput<float>({1, 2, 3, 4}));
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_2x2)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{4, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+ cgen.setInputsAndOutputs({in, block}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}
+ .addInput<float>({1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16})
+ .addInput<int32_t>({2, 2})
+ .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+ _context->setBackends({"cpu"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_Crop)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{8, 1, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{2, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int crop = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorBatchToSpaceND({{in, block, crop}, {out}});
+ cgen.setInputsAndOutputs({in, block, crop}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}
+ .addInput<float>(
+ {0, 1, 3, 0, 9, 11, 0, 2, 4, 0, 10, 12, 0, 5, 7, 0, 13, 15, 0, 6, 8, 0, 14, 16})
+ .addInput<int32_t>({2, 2})
+ .addInput<int32_t>({0, 0, 2, 0})
+ .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+ _context->setBackends({"cpu"});
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.cc
index 9f79575f2..da890978e 100644
--- a/tests/nnfw_api/src/one_op_tests/Equal.cc
+++ b/tests/nnfw_api/src/one_op_tests/Equal.cc
@@ -16,21 +16,111 @@
#include "GenModelTest.h"
-TEST_F(GenModelTest, OneOp_Equal)
+struct EqualVariationParam
{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class EqualVariation : public GenModelTest,
+ public ::testing::WithParamInterface<EqualVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, EqualVariation,
+ ::testing::Values(
+ // Float type
+ EqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({true, false, false, false})},
+ // Float type - broadcast
+ EqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({false, true, false, false})},
+ // Int32 type
+ EqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 5, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, false, false, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ EqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 5, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({false, false, true, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ EqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, 5, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, false, false, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ EqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, 5, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({true, false, false, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Bool type
+ EqualVariationParam{TestCaseData{}
+ .addInput<bool>({true, true, false, false})
+ .addInput<bool>({true, false, true, false})
+ .addOutput<bool>({true, false, false, true}),
+ circle::TensorType::TensorType_BOOL},
+ // Bool type - broadcast
+ EqualVariationParam{TestCaseData{}
+ .addInput<bool>({true, true, false, false})
+ .addInput<bool>({true})
+ .addOutput<bool>({true, true, false, false}),
+ circle::TensorType::TensorType_BOOL}
+
+ ));
+
+TEST_P(EqualVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
cgen.addOperatorEqual({{lhs, rhs}, {out}});
cgen.setInputsAndOutputs({lhs, rhs}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}
- .addInput<float>({0.1, 0.3, 0.5, 0.7})
- .addInput<float>({0.1, 0.2, 0.3, 0.4})
- .addOutput<bool>({true, false, false, false}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
SUCCEED();
}
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt
index d13417847..49070d30d 100644
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ b/tests/tools/nnpackage_run/CMakeLists.txt
@@ -10,6 +10,7 @@ list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
+list(APPEND NNPACKAGE_RUN_SRCS "src/rawformatter.cc")
nnfw_find_package(Boost REQUIRED program_options)
nnfw_find_package(Ruy QUIET)
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc
index c0f937797..ba034cee3 100644
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -201,13 +201,15 @@ void Args::Initialize(void)
("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
#endif
+ ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+ ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
"The output buffer size in JSON 1D array\n"
"If not given, the model's output sizes are used\n"
"e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
- ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
+ ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h
index 11fd00023..82015afcc 100644
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/nnpackage_run/src/args.h
@@ -52,6 +52,8 @@ public:
const std::string &getLoadFilename(void) const { return _load_filename; }
WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
#endif
+ const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+ const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
const int getNumRuns(void) const { return _num_runs; }
const int getWarmupRuns(void) const { return _warmup_runs; }
const int getRunDelay(void) const { return _run_delay; }
@@ -80,6 +82,8 @@ private:
std::string _load_filename;
WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
#endif
+ std::string _dump_raw_filename;
+ std::string _load_raw_filename;
TensorShapeMap _shape_prepare;
TensorShapeMap _shape_run;
int _num_runs;
diff --git a/tests/tools/nnpackage_run/src/formatter.h b/tests/tools/nnpackage_run/src/formatter.h
new file mode 100644
index 000000000..0dca3407a
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNPACKAGE_RUN_FORMATTER_H__
+#define __NNPACKAGE_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace nnpkg_run
+{
+class Formatter
+{
+public:
+ virtual ~Formatter() = default;
+ Formatter(nnfw_session *sess) : session_(sess) {}
+ virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+ virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+ virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+ {
+ return std::vector<TensorShape>();
+ };
+
+protected:
+ nnfw_session *session_;
+};
+} // namespace nnpkg_run
+
+#endif // __NNPACKAGE_RUN_FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc
index e207465d4..c0e670b1e 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -143,6 +143,8 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
else
throw std::runtime_error("model input type is int8. But h5 data type is different.");
break;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
default:
throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
}
@@ -234,6 +236,8 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
default:
throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
}
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h
index 5c831021b..ca2d5a576 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/nnpackage_run/src/h5formatter.h
@@ -17,26 +17,24 @@
#ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
#define __NNPACKAGE_RUN_H5FORMATTER_H__
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
#include <string>
#include <vector>
-#include "types.h"
-#include "allocation.h"
-
struct nnfw_session;
namespace nnpkg_run
{
-class H5Formatter
+class H5Formatter : public Formatter
{
public:
- H5Formatter(nnfw_session *sess) : session_(sess) {}
- std::vector<TensorShape> readTensorShapes(const std::string &filename);
- void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
- void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
-
-private:
- nnfw_session *session_;
+ H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+ std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+ void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+ void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
};
} // namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/nnpackage_run/src/nnfw_util.cc
index a57069bd8..da98da5e4 100644
--- a/tests/tools/nnpackage_run/src/nnfw_util.cc
+++ b/tests/tools/nnpackage_run/src/nnfw_util.cc
@@ -41,6 +41,7 @@ uint64_t bufsize_for(const nnfw_tensorinfo *ti)
sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
sizeof(int8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+ sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
};
return elmsize[ti->dtype] * num_elems(ti);
}
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
index 1fcab512a..71d8b5977 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -24,6 +24,7 @@
#include "nnfw_util.h"
#include "nnfw_internal.h"
#include "randomgen.h"
+#include "rawformatter.h"
#ifdef RUY_PROFILER
#include "ruy/profiler/profiler.h"
#endif
@@ -97,7 +98,7 @@ int main(const int argc, char **argv)
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
{
std::cerr << "E: not supported input type" << std::endl;
exit(-1);
@@ -114,7 +115,7 @@ int main(const int argc, char **argv)
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
{
std::cerr << "E: not supported output type" << std::endl;
exit(-1);
@@ -194,10 +195,15 @@ int main(const int argc, char **argv)
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
if (!args.getLoadFilename().empty())
H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+ else if (!args.getLoadRawFilename().empty())
+ RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
else
RandomGenerator(session).generate(inputs);
#else
- RandomGenerator(session).generate(inputs);
+ if (!args.getLoadRawFilename().empty())
+ RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+ else
+ RandomGenerator(session).generate(inputs);
#endif
// prepare output
@@ -267,6 +273,8 @@ int main(const int argc, char **argv)
if (!args.getDumpFilename().empty())
H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
#endif
+ if (!args.getDumpRawFilename().empty())
+ RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
NNPR_ENSURE_STATUS(nnfw_close_session(session));
diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/nnpackage_run/src/randomgen.cc
index a1fcf82dc..4789b6b1a 100644
--- a/tests/tools/nnpackage_run/src/randomgen.cc
+++ b/tests/tools/nnpackage_run/src/randomgen.cc
@@ -61,6 +61,9 @@ void RandomGenerator::generate(std::vector<Allocation> &inputs)
case NNFW_TYPE_TENSOR_INT64:
randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
break;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
default:
std::cerr << "Not supported input type" << std::endl;
std::exit(-1);
diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc
new file mode 100644
index 000000000..f90018e56
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/rawformatter.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace nnpkg_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+ // TODO: Support multiple inputs
+ // Option 1. Get comman-separated input file list like --load:raw in.0,in.1,in.2
+ // Option 2. Get prefix --load:raw out
+ // Internally access out.0, out.1, out.2, ... out.{N} where N is determined by api.
+ if (num_inputs != 1)
+ {
+ throw std::runtime_error("Only 1 input is supported for raw input");
+ }
+ try
+ {
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+ // allocate memory for data
+ auto bufsz = bufsize_for(&ti);
+ inputs[i].alloc(bufsz);
+
+ std::ifstream file(filename, std::ios::ate | std::ios::binary);
+ auto filesz = file.tellg();
+ if (bufsz != filesz)
+ {
+ throw std::runtime_error("Input Size does not match: " + std::to_string(bufsz) +
+ " expected, but " + std::to_string(filesz) + " provided.");
+ }
+ file.seekg(0, std::ios::beg);
+ file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+ file.close();
+
+ NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+ NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+ uint32_t num_outputs;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+ // TODO: Support multiple outputs
+ // Available options are same.
+ if (num_outputs != 1)
+ {
+ throw std::runtime_error("Only 1 output is supported for raw input");
+ }
+ try
+ {
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+ auto bufsz = bufsize_for(&ti);
+
+ std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+ file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+ file.close();
+ std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+ }
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
+ std::exit(-1);
+ }
+}
+} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/rawformatter.h b/tests/tools/nnpackage_run/src/rawformatter.h
new file mode 100644
index 000000000..8bfc354c5
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNPACKAGE_RUN_RAWFORMATTER_H__
+#define __NNPACKAGE_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace nnpkg_run
+{
+class RawFormatter : public Formatter
+{
+public:
+ RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+ void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+ void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace nnpkg_run
+
+#endif // __NNPACKAGE_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/tflite_comparator/src/tflite_comparator.cc b/tests/tools/tflite_comparator/src/tflite_comparator.cc
index 65a40b493..b7422ed3f 100644
--- a/tests/tools/tflite_comparator/src/tflite_comparator.cc
+++ b/tests/tools/tflite_comparator/src/tflite_comparator.cc
@@ -106,6 +106,8 @@ inline size_t sizeOfNnfwType(NNFW_TYPE type)
case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
return 1;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ return 2;
case NNFW_TYPE_TENSOR_FLOAT32:
case NNFW_TYPE_TENSOR_INT32:
return 4;
@@ -236,6 +238,8 @@ int main(const int argc, char **argv)
case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
randomData<int8_t>(randgen, inputs[i]);
break;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ randomData<int16_t>(randgen, inputs[i]);
case NNFW_TYPE_TENSOR_FLOAT32:
randomData<float>(randgen, inputs[i]);
break;
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh
index fa32c7350..f03d52371 100755
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -1,6 +1,4 @@
#!/usr/bin/env bash
-set -x
-
usage()
{
echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
@@ -29,7 +27,7 @@ __UbuntuPackages="build-essential"
# other development supports
__UbuntuPackages+=" ocl-icd-opencl-dev"
__UbuntuPackages+=" libhdf5-dev"
-__UbuntuBoostPackages=" libboost-all-dev"
+__UbuntuPackages+=" libboost-all-dev"
# symlinks fixer
__UbuntuPackages+=" symlinks"
@@ -77,7 +75,6 @@ for i in "$@" ; do
;;
focal)
__LinuxCodeName=focal
- __UbuntuBoostPackages=" libboost1.67-all-dev"
;;
--setproxy*)
proxyip="${i#*=}"
@@ -95,8 +92,7 @@ for i in "$@" ; do
esac
done
-# Current runtime build system supports boost version under 1.70
-__UbuntuPackages+="$__UbuntuBoostPackages"
+set -x
__RootfsDir="$__CrossDir/rootfs/$__BuildArch"
diff --git a/tools/nnpackage_tool/model2nnpkg/README.md b/tools/nnpackage_tool/model2nnpkg/README.md
index 9d4676e23..34fd49d8b 100644
--- a/tools/nnpackage_tool/model2nnpkg/README.md
+++ b/tools/nnpackage_tool/model2nnpkg/README.md
@@ -1,6 +1,6 @@
# model2nnpkg
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`model2nnpkg` is a tool to convert model (e.g. `tflite`, `circle` or `tvn`) to `nnpackage`.
It takes `modelfile` as input and generates `nnpackage`.
@@ -8,7 +8,7 @@ It takes `modelfile` as input and generates `nnpackage`.
```
Usage: model2nnpkg.sh [options] modelfile
-Convert modelfile (either tflite or circle) to nnpackage.
+Convert modelfile to nnpackage.
Options:
-h show this help
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
index 2b26e4068..9374af737 100755
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -10,7 +10,7 @@ config_src=""
usage() {
echo "Usage: $progname [options] modelfile"
- echo "Convert modelfile (either tflite or circle) to nnpackage."
+ echo "Convert modelfile (tflite, circle or tvn) to nnpackage."
echo ""
echo "Options:"
echo " -h show this help"
@@ -78,7 +78,7 @@ fi
cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
{
"major-version" : "1",
- "minor-version" : "1",
+ "minor-version" : "2",
"patch-version" : "0",
"configs" : [ "$config" ],
"models" : [ "$modelfile" ],
diff --git a/tools/tflitefile_tool/ir/README.md b/tools/tflitefile_tool/ir/README.md
new file mode 100644
index 000000000..2625dfb91
--- /dev/null
+++ b/tools/tflitefile_tool/ir/README.md
@@ -0,0 +1,5 @@
+# IR
+
+A model has a subgraph or subgraphs. A subgraph has operators and tensors.
+
+Parser will use these IRs as data.
diff --git a/tools/tflitefile_tool/ir/__init__.py b/tools/tflitefile_tool/ir/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/ir/__init__.py
diff --git a/tools/tflitefile_tool/graph_stats.py b/tools/tflitefile_tool/ir/graph_stats.py
index 85acaefa6..5aebdbeaa 100755
--- a/tools/tflitefile_tool/graph_stats.py
+++ b/tools/tflitefile_tool/ir/graph_stats.py
@@ -39,43 +39,18 @@ class GraphStats():
return self
-def PrintGraphStats(stats, verbose):
- print("Number of all operator types: {0}".format(len(stats.op_counts)))
-
- # Print op type stats
- for op_name in sorted(stats.op_counts.keys()):
- occur = stats.op_counts[op_name]
- optype_info_str = "\t{:38}: {:4}".format(op_name, occur)
-
- print(optype_info_str)
-
- summary_str = "{0:46}: {1:4}".format("Number of all operators",
- sum(stats.op_counts.values()))
- print(summary_str)
- print('')
-
- # Print memory stats
- from tensor_printer import ConvertBytesToHuman
- print("Expected TOTAL memory: {0}".format(ConvertBytesToHuman(stats.total_memory)))
- print("Expected FILLED memory: {0}".format(ConvertBytesToHuman(stats.filled_memory)))
- print('')
-
-
-def CalcGraphStats(op_parser):
+def CalcGraphStats(subg):
stats = GraphStats()
- for type_str, oper_list in op_parser.operators_per_type.items():
+ for type_str, oper_list in subg.optypes_map.items():
# number of occurrence of this operator type
occur = len(oper_list)
stats.accumulate_op_count(type_str, occur)
- # this operator type can be computed?
- can_compute = oper_list[0].operation.can_compute
-
total_memory = 0
filled_memory = 0 # only memory for constant
- for tensor in op_parser.GetAllTensors():
- if tensor.tf_buffer.DataLength() != 0:
+ for index, tensor in subg.tensors_map.items():
+ if tensor.buffer is not None:
filled_memory += tensor.memory_size
total_memory += tensor.memory_size
stats.accumulate_filled_memory(filled_memory)
diff --git a/tools/tflitefile_tool/ir/operator.py b/tools/tflitefile_tool/ir/operator.py
new file mode 100644
index 000000000..0601e6119
--- /dev/null
+++ b/tools/tflitefile_tool/ir/operator.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Operator(object):
+ def __init__(self):
+ self._index = -1
+ self._inputs = []
+ self._outputs = []
+ self._op_name = ""
+ self._actviation = ""
+ self._options = ""
+
+ '''index'''
+
+ @property
+ def index(self):
+ '''operator's int type index'''
+ return self._index
+
+ @index.setter
+ def index(self, value):
+ if not isinstance(value, int):
+ raise TypeError("must be set to an integer")
+ self._index = value
+
+ '''inputs'''
+
+ @property
+ def inputs(self):
+ '''Operators's input tensors as a list which consists of Tensors'''
+ return self._inputs
+
+ @inputs.setter
+ def inputs(self, value):
+ if not isinstance(value, list):
+ raise TypeError("must be set to a list")
+ self._inputs = value
+
+ '''outputs'''
+
+ @property
+ def outputs(self):
+ '''Operators's output tensors as a list which consists of Tensors'''
+ return self._outputs
+
+ @outputs.setter
+ def outputs(self, value):
+ if not isinstance(value, list):
+ raise TypeError("must be set to a list")
+ self._outputs = value
+
+ '''op_name'''
+
+ @property
+ def op_name(self):
+ '''Operator's name str'''
+ return self._op_name
+
+ @op_name.setter
+ def op_name(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._op_name = value
+
+ '''actviation'''
+
+ @property
+ def actviation(self):
+ '''Operator's actviation str'''
+ return self._actviation
+
+ @actviation.setter
+ def actviation(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._actviation = value
+
+ '''options'''
+
+ @property
+ def options(self):
+ '''Operator's options str'''
+ return self._options
+
+ @options.setter
+ def options(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._options = value
diff --git a/tools/tflitefile_tool/ir/subgraph.py b/tools/tflitefile_tool/ir/subgraph.py
new file mode 100644
index 000000000..e68713480
--- /dev/null
+++ b/tools/tflitefile_tool/ir/subgraph.py
@@ -0,0 +1,170 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import MutableMapping
+'''optype -> Operator Index List'''
+
+
+class OpTypesMap(MutableMapping):
+ def __init__(self, *args, **kwargs):
+ self.store = dict()
+ self.update(dict(*args, **kwargs))
+
+ def __getitem__(self, key):
+ return self.store[self._keytransform(key)]
+
+ def __setitem__(self, key, value):
+ k = self._keytransform(key)
+ if not k in self.store.keys():
+ self.store[k] = []
+ self.store[k].append(value)
+
+ def __delitem__(self, key):
+ del self.store[self._keytransform(key)]
+
+ def __iter__(self):
+ return iter(self.store)
+
+ def __len__(self):
+ return len(self.store)
+
+ def _keytransform(self, key):
+ if not isinstance(key, str):
+ raise TypeError("must be set to a str")
+ return key
+
+
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Subgraph(object):
+ def __init__(self):
+ self._index = -1
+ self._inputs = []
+ self._outputs = []
+ self._subg_name = ""
+ self._model_name = ""
+ self._tensors_map = {}
+ self._operators_map = {}
+ self._optypes_map = OpTypesMap()
+
+ '''index'''
+
+ @property
+ def index(self):
+ '''Subgraph's int type index'''
+ return self._index
+
+ @index.setter
+ def index(self, value):
+ if not isinstance(value, int):
+ raise TypeError("must be set to an integer")
+ self._index = value
+
+ '''inputs'''
+
+ @property
+ def inputs(self):
+ '''Subgraph's input tensors as a list which consists of Tensors'''
+ return self._inputs
+
+ @inputs.setter
+ def inputs(self, value):
+ if not isinstance(value, list):
+ raise TypeError("must be set to a list")
+ self._inputs = value
+
+ '''outputs'''
+
+ @property
+ def outputs(self):
+ '''Subgraph's output tensors as a list which consists of Tensors'''
+ return self._outputs
+
+ @outputs.setter
+ def outputs(self, value):
+ if not isinstance(value, list):
+ raise TypeError("must be set to a list")
+ self._outputs = value
+
+ '''subg_name'''
+
+ @property
+ def subg_name(self):
+ '''Subgraph's name str'''
+ return self._subg_name
+
+ @subg_name.setter
+ def subg_name(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._subg_name = value
+
+ '''model_name'''
+
+ @property
+ def model_name(self):
+ '''Model name str'''
+ return self._model_name
+
+ @model_name.setter
+ def model_name(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._model_name = value
+
+ '''tensors_map'''
+
+ @property
+ def tensors_map(self):
+ '''Subgraph's all tensors(key:index, value:Tensor)'''
+ return self._tensors_map
+
+ @tensors_map.setter
+ def tensors_map(self, value):
+ if not isinstance(value, dict):
+ raise TypeError("must be set to a dict")
+ self._tensors_map = value
+
+ '''operators_map'''
+
+ @property
+ def operators_map(self):
+ '''Subgraph's operators(key:index, value:Operator)'''
+ return self._operators_map
+
+ @operators_map.setter
+ def operators_map(self, value):
+ if not isinstance(value, dict):
+ raise TypeError("must be set to a dict")
+ self._operators_map = value
+
+ '''optypes_map'''
+
+ @property
+ def optypes_map(self):
+ '''Subgraph's operators per type(key:optype, value:[op_indice])'''
+ return self._optypes_map
+
+ @optypes_map.setter
+ def optypes_map(self, value):
+ if not isinstance(value, OpTypesMap):
+ raise TypeError("must be set to a OpTypesMap")
+ self._optypes_map = value
diff --git a/tools/tflitefile_tool/ir/tensor.py b/tools/tflitefile_tool/ir/tensor.py
new file mode 100644
index 000000000..f0f35a74b
--- /dev/null
+++ b/tools/tflitefile_tool/ir/tensor.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Tensor(object):
+ def __init__(self):
+ self._index = -1
+ self._tensor_name = ""
+ self._buffer = None
+ self._buffer_index = -1
+ self._type_name = ""
+ self._shape = []
+ self._memory_size = -1
+
+ '''index'''
+
+ @property
+ def index(self):
+ '''Tensor's int type index'''
+ return self._index
+
+ @index.setter
+ def index(self, value):
+ if not isinstance(value, int):
+ raise TypeError("must be set to an integer")
+ self._index = value
+
+ '''tensor_name'''
+
+ @property
+ def tensor_name(self):
+ '''Tensor's name str'''
+ return self._tensor_name
+
+ @tensor_name.setter
+ def tensor_name(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._tensor_name = value
+
+ '''buffer'''
+
+ @property
+ def buffer(self):
+ '''Tensor's buffer as a numpy instance type'''
+ return self._buffer
+
+ @buffer.setter
+ def buffer(self, value):
+ self._buffer = value
+
+ '''buffer_index'''
+
+ @property
+ def buffer_index(self):
+ '''Tensor's int type buffer index'''
+ return self._buffer_index
+
+ @buffer_index.setter
+ def buffer_index(self, value):
+ if not isinstance(value, int):
+ raise TypeError("must be set to an integer")
+ self._buffer_index = value
+
+ '''type_name'''
+
+ @property
+ def type_name(self):
+ '''Tensor's type name str'''
+ return self._type_name
+
+ @type_name.setter
+ def type_name(self, value):
+ if not isinstance(value, str):
+ raise TypeError("must be set to a str")
+ self._type_name = value
+
+ '''shape'''
+
+ @property
+ def shape(self):
+ '''Tensor's shape as a list'''
+ return self._shape
+
+ @shape.setter
+ def shape(self, value):
+ if not isinstance(value, list):
+ raise TypeError("must be set to a list")
+ self._shape = value
+
+ '''memory_size'''
+
+ @property
+ def memory_size(self):
+ '''Tensor's memory size as int type'''
+ return self._memory_size
+
+ @memory_size.setter
+ def memory_size(self, value):
+ if not isinstance(value, int):
+ raise TypeError("must be set to an integer")
+ self._memory_size = value
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py
index ed534c14c..76c43acfc 100755
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,29 +13,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+'''
+Why is this file named as `model_parser.py` which is same to `parser/model_parser.py`?
+- Until now, users have used by the path `tools/tflitefile_tool/model_parser.py`.
+- Let's change the name to the proper name like `main.py` after the task for revision is done.
+'''
-import os
-import sys
-import numpy
-import flatbuffers
-import tflite.Model
-import tflite.SubGraph
import argparse
-import graph_stats
-from operator_parser import OperatorParser
-from subgraph_printer import SubgraphPrinter
-from model_saver import ModelSaver
+from parser.model_parser import ModelParser
+from printer.subgraph_printer import SubgraphPrinter
+from saver.model_saver import ModelSaver
-class TFLiteModelFileParser(object):
+class MainOption(object):
def __init__(self, args):
- # Read flatbuffer file descriptor using argument
- self.tflite_file = args.input_file
+ self.model_file = args.input_file
- # Set print level (0 ~ 1)
+ # Set print level (0 ~ 2)
self.print_level = args.verbose
- if (args.verbose > 1):
- self.print_level = 1
+ if (args.verbose > 2):
+ self.print_level = 2
if (args.verbose < 0):
self.print_level = 0
@@ -66,54 +63,24 @@ class TFLiteModelFileParser(object):
if self.save == True:
self.save_prefix = args.prefix
- def PrintModel(self, model_name, op_parser):
- printer = SubgraphPrinter(self.print_level, op_parser, model_name)
- if self.print_all_tensor == False:
- printer.SetPrintSpecificTensors(self.print_tensor_index)
+def PrintSubgraph(option, subg):
+ printer = SubgraphPrinter(option.print_level, subg)
- if self.print_all_operator == False:
- printer.SetPrintSpecificOperators(self.print_operator_index)
+ if option.print_all_tensor == False:
+ printer.SetPrintSpecificTensors(option.print_tensor_index)
- printer.PrintInfo()
+ if option.print_all_operator == False:
+ printer.SetPrintSpecificOperators(option.print_operator_index)
- def SaveModel(self, model_name, op_parser):
- saver = ModelSaver(model_name, op_parser)
+ printer.PrintInfo()
- if self.save_config == True:
- saver.SaveConfigInfo(self.save_prefix)
- def main(self):
- # Generate Model: top structure of tflite model file
- buf = self.tflite_file.read()
- buf = bytearray(buf)
- tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+def SaveSubgraph(option, subg):
+ saver = ModelSaver(subg)
- stats = graph_stats.GraphStats()
- # Model file can have many models
- for subgraph_index in range(tf_model.SubgraphsLength()):
- tf_subgraph = tf_model.Subgraphs(subgraph_index)
- model_name = "#{0} {1}".format(subgraph_index, tf_subgraph.Name())
- # 0th subgraph is main subgraph
- if (subgraph_index == 0):
- model_name += " (MAIN)"
-
- # Parse Operators
- op_parser = OperatorParser(tf_model, tf_subgraph)
- op_parser.Parse()
-
- stats += graph_stats.CalcGraphStats(op_parser)
-
- if self.save == False:
- # print all of operators or requested objects
- self.PrintModel(model_name, op_parser)
- else:
- # save all of operators in this model
- self.SaveModel(model_name, op_parser)
-
- print('==== Model Stats ({} Subgraphs) ===='.format(tf_model.SubgraphsLength()))
- print('')
- graph_stats.PrintGraphStats(stats, self.print_level)
+ if option.save_config == True:
+ saver.SaveConfigInfo(option.save_prefix)
if __name__ == '__main__':
@@ -138,6 +105,14 @@ if __name__ == '__main__':
arg_parser.add_argument(
'-p', '--prefix', help="file prefix to be saved (with -c/--config option)")
args = arg_parser.parse_args()
+ option = MainOption(args)
+
+ subg_list = ModelParser(option.model_file).Parse()
- # Call main function
- TFLiteModelFileParser(args).main()
+ for subg in subg_list:
+ if option.save == False:
+ # print all of operators or requested objects
+ PrintSubgraph(option, subg)
+ else:
+ # save all of operators in this model
+ SaveSubgraph(option, subg)
diff --git a/tools/tflitefile_tool/operation.py b/tools/tflitefile_tool/operation.py
deleted file mode 100755
index 6aa752772..000000000
--- a/tools/tflitefile_tool/operation.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Conv2DOptions
-import tflite.Pool2DOptions
-import tflite.BuiltinOptions
-import tflite.Tensor
-from tensor_wrapping import Tensor
-import math
-
-
-# NOTICE
-# - an internal class. do not import outside this file.
-# - REF: https://stackoverflow.com/questions/551038/private-implementation-class-in-python
-class _OperationComputeMethod(object):
- '''
- NOTE: How to count operations of convolution(and also pooling)?
-
- If we know operations of output's one element, we can calculate total output's operations.
- For example, consider output Shape[3,3]
- [ e11 e12 e13 ]
- [ e21 e22 e23 ]
- [ e31 e32 e33 ]
- If we know operations for calculation of e11, we can know total operations of output(e11, e12, ... e33)
- by operations of e11 * 9(total number of elements)
-
- So we only need to know how to calculate operations of e11.
- For this, just think how to conv operation to the output's element
- If input_channel is 1, we can only think of kernel_size(kernel_w and kernel_h).
- For example, consider input Shape[3,3] and kernel Shape[2,2]
- [ i11 i12 i13 ] [ k11 k12 ] [ o11 o12 o13 ]
- [ i21 i22 i23 ] * [ k21 k22 ] = [ o21 o22 o23 ]
- [ i31 i32 i33 ] [ o31 o32 o33 ]
-
- Conv operation: for o11, i11 * k11 + i21 * k21 + i12 * k12 + i22 * k22 = o11
- On above conv operation, mul operations are done at 4 times(== kernel_w * kernel_h)
- and add operations are dont at 3 times(== kernel_w * kernel_h - 1)
- and also, bias will be done and it will be counted on add operations.
-
- Anyway, we can calculate total operations on this way. This can apply to the way of pooling.
- '''
-
- def ComputeOperationForConv2D(self, tf_operator, inputs, outputs):
- assert (
- tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
- .Conv2DOptions)
-
- # NOTE: Assume that conv2d operator always take 3 tensors as inputs
- # and both width and height are the same.
- # operator_inputs[]: [input_tensor, weight_tensor, bias_tensor]
- # operator_outputs[]: [output_tensor]
- # tflite's tensor shape: [N,H,W,C]
- input_tensor = inputs[0].tf_tensor
- weight_tensor = inputs[1].tf_tensor
- output_tensor = outputs[0].tf_tensor
-
- # kernel_ops = (kernel_w * kernel_h * input_channel * 2(multiply and add))
- kernel_ops = (
- weight_tensor.Shape(2) * weight_tensor.Shape(1) * input_tensor.Shape(3))
-
- # total ops
- # = batch_size * output_channel * output_width * output_height * kernel_ops
- total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
- output_tensor.Shape(2) * output_tensor.Shape(1))
-
- add_instr_num = (total_ops * (kernel_ops + 1)) # bias
- mul_instr_num = (total_ops * (kernel_ops))
- nonlinear_instr_num = 0
- return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
- # NOTE: Reference the comment 'NOTE' of ComputeOperationForConv2D
-
- def ComputeOperationForPooling(self, tf_operator, inputs, outputs):
- assert (
- tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
- .Pool2DOptions)
-
- dummy_input_tensor = inputs[0].tf_tensor
- output_tensor = outputs[0].tf_tensor
-
- pool2d_options = tflite.Pool2DOptions.Pool2DOptions()
- pool2d_options.Init(tf_operator.BuiltinOptions().Bytes,
- tf_operator.BuiltinOptions().Pos)
-
- # kernel_ops = kernel_w * kernel_h
- kernel_ops = (pool2d_options.FilterWidth() * pool2d_options.FilterHeight())
-
- # total ops
- # = batch_size * output_channel * output_width * output_height *
- # kernel_ops(kernel_w * kernel_h)
- total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
- output_tensor.Shape(2) * output_tensor.Shape(1))
-
- add_instr_num = (total_ops * kernel_ops - 1)
- mul_instr_num = (total_ops * kernel_ops)
- nonlinear_instr_num = 0
- return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
- def ComputeOperationForSoftmax(self, tf_operator, inputs, outputs):
- assert (
- tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
- .SoftmaxOptions)
-
- input_tensor = inputs[0].tf_tensor
-
- dummy_batch_size = input_tensor.Shape(0)
- input_dim = input_tensor.Shape(1)
-
- # Softmax(x_i) = exp(x_i) / sum of exp(x)
- add_instr_num = input_dim - 1 # sum of exp(x)
- mul_instr_num = input_dim # /
- nonlinear_instr_num = input_dim + input_dim # sum of exp(x) and exp(x_i)
- return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
- def ComputeOperationForFullyConnected(self, tf_operator, inputs, outputs):
- assert (
- tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
- .FullyConnectedOptions)
-
- # NOTE: Assume that fully_connected operator always take 3 tensors as inputs
- # and its X tensor's shape is [1, 1, 1, input_dim] with
- # its output Y [1, output_dim]
- input_tensor = inputs[0].tf_tensor
- output_tensor = outputs[0].tf_tensor
-
- # ops_per_element
- # = input_dim(multiplication) + input_dim-1(addition) + 1(bias)
- # total_ops
- # = ops_per_elem * output_dim
-
- add_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
- mul_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
- nonlinear_instr_num = 0
- return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
- def ComputeOperationForNothing(self, tf_operator, inputs, outputs):
- add_instr_num = 0
- mul_instr_num = 0
- nonlinear_instr_num = 0
- return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
- def NYI_ComputeOperation(self, tf_operator, inputs, outputs):
- pass
-
- operation_to_method_map = {
- # Inceptionv3
- "CONV_2D": ComputeOperationForConv2D,
- "AVERAGE_POOL_2D": ComputeOperationForPooling,
- "MAX_POOL_2D": ComputeOperationForPooling,
- "SOFTMAX": ComputeOperationForSoftmax,
- "FULLY_CONNECTED": ComputeOperationForFullyConnected,
- "CONCATENATION": ComputeOperationForNothing,
- # Extension
- "TOPK_V2": NYI_ComputeOperation,
- "SUB": NYI_ComputeOperation,
- "STRIDED_SLICE": NYI_ComputeOperation,
- "RESHAPE": NYI_ComputeOperation,
- "GATHER": NYI_ComputeOperation,
- "RESIZE_BILINEAR": NYI_ComputeOperation,
- "CAST": NYI_ComputeOperation,
- "ADD": NYI_ComputeOperation,
- "MUL": NYI_ComputeOperation,
- "DIV": NYI_ComputeOperation,
- "CUSTOM(TensorFlowMax)": NYI_ComputeOperation,
- "CUSTOM": NYI_ComputeOperation,
- }
-
-
-class Operation(object):
- def __init__(self, tf_operator, operator_str, inputs, outputs):
- self.tf_operator = tf_operator
- self.operator_str = operator_str
- self.inputs = inputs
- self.outputs = outputs
- self.add_instr_num = 0
- self.mul_instr_num = 0
- self.nonlinear_instr_num = 0
- self.can_compute = True
- self.Compute()
-
- def Compute(self):
- comp_map = _OperationComputeMethod().operation_to_method_map
- if not self.operator_str in comp_map.keys():
- self.can_compute = False
- return
-
- method = comp_map[self.operator_str]
- if method.__name__ == _OperationComputeMethod().NYI_ComputeOperation.__name__:
- self.can_compute = False
- return
-
- self.add_instr_num, self.mul_instr_num, self.nonlinear_instr_num = method(
- _OperationComputeMethod(), self.tf_operator, self.inputs, self.outputs)
-
- def TotalInstrNum(self):
- return (self.add_instr_num + self.mul_instr_num + self.nonlinear_instr_num)
diff --git a/tools/tflitefile_tool/operator_parser.py b/tools/tflitefile_tool/operator_parser.py
deleted file mode 100755
index 2c230c275..000000000
--- a/tools/tflitefile_tool/operator_parser.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Model
-import tflite.SubGraph
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-from operator_wrapping import Operator, EnumStrMaps
-from tensor_wrapping import Tensor, SetTensorTypeStr
-from operation import Operation
-
-
-class OperatorParser(object):
- def __init__(self, tf_model, tf_subgraph):
- self.tf_model = tf_model
- self.tf_subgraph = tf_subgraph
- self.operators_in_list = list()
- self.operators_per_type = dict()
- # Tensor type string table
- SetTensorTypeStr()
-
- def Parse(self):
- for operator_idx in range(self.tf_subgraph.OperatorsLength()):
- tf_operator = self.tf_subgraph.Operators(operator_idx)
- opcode_str = self.GetOpcodeStr(tf_operator)
- input_tensors = self.GetInputTensors(tf_operator)
- output_tensors = self.GetOutputTensors(tf_operator)
-
- op = Operator(operator_idx, tf_operator, input_tensors, output_tensors,
- opcode_str)
- self.AppendOperator(op)
-
- def GetOpcodeStr(self, tf_operator):
- opcode_list_idx = tf_operator.OpcodeIndex()
- opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
- opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
- if opcode_id == 32:
- # Custom operator
- custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
- custom_op_name = custom_operator.CustomCode().decode('utf-8')
- opcode_str = opcode_str + "(" + custom_op_name + ")"
- return opcode_str
-
- def GetInputTensors(self, tf_operator):
- operator_inputs = tf_operator.InputsAsNumpy()
- return self.GetTensors(operator_inputs)
-
- def GetOutputTensors(self, tf_operator):
- operator_outputs = tf_operator.OutputsAsNumpy()
- return self.GetTensors(operator_outputs)
-
- def GetTensors(self, tf_tensors_index):
- return_list = list()
- for tensor_idx in tf_tensors_index:
- # in case of optional input, tensor_idx == -1
- if (tensor_idx < 0):
- return_list.append(Tensor(tensor_idx, None, None))
- continue
- tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
- buffer_idx = tf_tensor.Buffer()
- tf_buffer = self.tf_model.Buffers(buffer_idx)
- return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
- return return_list
-
- def GetAllTensors(self):
- return_list = list()
- for tensor_idx in range(self.tf_subgraph.TensorsLength()):
- if (tensor_idx < 0):
- return_list.append(Tensor(tensor_idx, 0, 0))
- continue
- tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
- buffer_idx = tf_tensor.Buffer()
- tf_buffer = self.tf_model.Buffers(buffer_idx)
- return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
- return return_list
-
- def AppendOperator(self, operator):
- self.operators_in_list.append(operator)
-
- opcode_str = operator.opcode_str
- if opcode_str not in self.operators_per_type:
- self.operators_per_type[opcode_str] = list()
- self.operators_per_type[opcode_str].append(operator)
diff --git a/tools/tflitefile_tool/operator_printer.py b/tools/tflitefile_tool/operator_printer.py
deleted file mode 100755
index e7c553394..000000000
--- a/tools/tflitefile_tool/operator_printer.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
-
-
-def GetStrTensorIndex(tensors):
- return_string = "["
- for idx in range(len(tensors)):
- if idx != 0:
- return_string += ", "
- return_string += str(tensors[idx].tensor_idx)
- return_string += "]"
- return return_string
-
-
-class OperatorPrinter(object):
- def __init__(self, verbose, operator):
- self.verbose = verbose
- self.operator = operator
-
- def PrintInfo(self):
- if (self.verbose < 1):
- return
-
- op_str = "Operator {0}: {1}".format(self.operator.operator_idx,
- self.operator.opcode_str)
-
- print(op_str)
- print("\tFused Activation: " + self.operator.fused_activation)
- self.PrintTensors()
-
- def PrintTensors(self):
- print("\tInput Tensors" + GetStrTensorIndex(self.operator.inputs))
- for tensor in self.operator.inputs:
- TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
- print("\tOutput Tensors" + GetStrTensorIndex(self.operator.outputs))
- for tensor in self.operator.outputs:
- TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
-
- # operator option
- # Some operations does not have option. In such case no option is printed
- OptionPrinter(self.verbose, self.operator.opcode_str,
- self.operator.options).PrintInfo("\t")
diff --git a/tools/tflitefile_tool/operator_wrapping.py b/tools/tflitefile_tool/operator_wrapping.py
deleted file mode 100755
index 64bad1f08..000000000
--- a/tools/tflitefile_tool/operator_wrapping.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-import tflite.ActivationFunctionType
-from operation import Operation
-
-
-# Match enum value integer to name string
-# Assumption 1: enum value is defined by old style (can be used on python 2)
-# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
-# Assumption 3: only integer value is set by constant definition
-def BuildEnumClassStrMap(obj):
- ret = {}
- for fieldName in dir(obj):
- if (not fieldName.startswith('_')):
- fieldValue = getattr(obj, fieldName)
- if (isinstance(fieldValue, (int))):
- ret[fieldValue] = fieldName
- return ret
-
-
-class EnumStrMaps():
- BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
- ActivationFunctionType = BuildEnumClassStrMap(
- tflite.ActivationFunctionType.ActivationFunctionType())
- BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
-
-
-def GetAttribute(o, *args):
- import functools
- return functools.reduce(getattr, args, o)
-
-
-def BuildBuiltinOptionGen():
- bo_gen = {}
- for val_enum in EnumStrMaps.BuiltinOptions:
- val_str = EnumStrMaps.BuiltinOptions[val_enum]
- try:
- # Dynamically import Builtin Option classes
- # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
- module = __import__("tflite." + val_str)
- bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
- except ImportError as e:
- assert val_enum == 0 and val_str == "NONE"
- return bo_gen
-
-
-class OptionLoader:
- builtinOptionGen = BuildBuiltinOptionGen()
-
- @staticmethod
- def GetBuiltinOptions(options_type, options_table):
- if (options_table == None) and (options_type != 0):
- print(
- "Bad flatbuffer file: undefined builtin option table with defined option type"
- )
- exit(1)
- options = OptionLoader.builtinOptionGen[options_type]()
- options.Init(options_table.Bytes, options_table.Pos)
- return options
-
-
-class Operator(object):
- def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
- opcode_str):
- self.operator_idx = operator_idx
- self.tf_operator = tf_operator
- self.inputs = input_tensors
- self.outputs = output_tensors
- self.opcode_str = opcode_str
- self.operation = Operation(self.tf_operator, self.opcode_str, self.inputs,
- self.outputs)
- self.fused_activation = "NONE"
- self.SetupBuiltinOption()
- self.SetupFusedActivation()
-
- def SetupBuiltinOption(self):
- try:
- self.options = OptionLoader.GetBuiltinOptions(
- self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
- except KeyError:
- self.options = 0
- return
-
- def SetupFusedActivation(self):
- # FIXME: workaround for ops such as custom
- try:
- options = OptionLoader.GetBuiltinOptions(
- self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
- except KeyError:
- return
-
- # fused activation function
- try:
- activation_code = options.FusedActivationFunction()
- self.fused_activation = EnumStrMaps.ActivationFunctionType[activation_code]
- except AttributeError:
- # This operator does not support FusedActivationFunction
- pass
diff --git a/tools/tflitefile_tool/option_printer.py b/tools/tflitefile_tool/option_printer.py
deleted file mode 100755
index 15265adf2..000000000
--- a/tools/tflitefile_tool/option_printer.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class OptionPrinter(object):
- def __init__(self, verbose, op_name, options):
- self.verbose = verbose
- self.op_name = op_name
- self.options = options
-
- def GetPadding(self):
- if self.options.Padding() == 0:
- return "SAME"
- elif self.options.Padding() == 1:
- return "VALID"
- else:
- return "** wrong padding value **"
-
- def PrintInfo(self, tab=""):
- if (self.verbose < 1):
- pass
- if (self.options == 0):
- return
-
- option_str = self.GetOptionString()
- if option_str:
- print("{}Options".format(tab))
- print("{}\t{}".format(tab, option_str))
-
- def GetOptionString(self):
- if (self.op_name == "AVERAGE_POOL_2D" or self.op_name == "MAX_POOL_2D"):
- return "{}, {}, {}".format(
- "Filter W:H = {}:{}".format(self.options.FilterWidth(),
- self.options.FilterHeight()),
- "Stride W:H = {}:{}".format(self.options.StrideW(),
- self.options.StrideH()),
- "Padding = {}".format(self.GetPadding()))
- elif (self.op_name == "CONV_2D"):
- return "{}, {}, {}".format(
- "Stride W:H = {}:{}".format(self.options.StrideW(),
- self.options.StrideH()),
- "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
- self.options.DilationHFactor()),
- "Padding = {}".format(self.GetPadding()))
- elif (self.op_name == "DEPTHWISE_CONV_2D"):
- # yapf: disable
- return "{}, {}, {}, {}".format(
- "Stride W:H = {}:{}".format(self.options.StrideW(),
- self.options.StrideH()),
- "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
- self.options.DilationHFactor()),
- "Padding = {}".format(self.GetPadding()),
- "DepthMultiplier = {}".format(self.options.DepthMultiplier()))
- # yapf: enable
diff --git a/tools/tflitefile_tool/parser/__init__.py b/tools/tflitefile_tool/parser/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/parser/__init__.py
diff --git a/tools/tflitefile_tool/parser/model_parser.py b/tools/tflitefile_tool/parser/model_parser.py
new file mode 100755
index 000000000..68cd31a23
--- /dev/null
+++ b/tools/tflitefile_tool/parser/model_parser.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from parser.tflite.tflite_parser import TFLiteParser
+
+
+class ModelParser(object):
+ def __init__(self, model_file):
+ self.parser = None
+ # model_file: _io.BufferedReader
+ if model_file.name.endswith("tflite"):
+ self.parser = TFLiteParser(model_file)
+ # TODO: Add more parser
+
+ def Parse(self):
+ if self.parser is None:
+ raise NotImplementedError
+ return self.parser.Parse()
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py
new file mode 100644
index 000000000..6a3a2054f
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.BuiltinOperator
+import tflite.ActivationFunctionType
+import tflite.BuiltinOptions
+
+
+# Match enum value integer to name string
+# Assumption 1: enum value is defined by old style (can be used on python 2)
+# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
+# Assumption 3: only integer value is set by constant definition
+def BuildEnumClassStrMap(obj):
+ ret = {}
+ for fieldName in dir(obj):
+ if (not fieldName.startswith('_')):
+ fieldValue = getattr(obj, fieldName)
+ if (isinstance(fieldValue, (int))):
+ ret[fieldValue] = fieldName
+ return ret
+
+
+class EnumStrMaps():
+ BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
+ ActivationFunctionType = BuildEnumClassStrMap(
+ tflite.ActivationFunctionType.ActivationFunctionType())
+ BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_operator.py b/tools/tflitefile_tool/parser/tflite/tflite_operator.py
new file mode 100755
index 000000000..211007e1c
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_operator.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.operator import Operator
+from .tflite_enum_str_maps import EnumStrMaps
+from .tflite_option import OptionLoader, GetStringOptions
+
+
+class TFLiteOperator(Operator):
+ def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
+ opcode_str):
+ super(TFLiteOperator, self).__init__()
+
+ self.index = operator_idx
+ self.inputs = input_tensors
+ self.outputs = output_tensors
+ self.op_name = opcode_str
+ self.activation = "NONE"
+ self.options = ""
+
+ self.tf_operator = tf_operator
+ self.tf_options = None
+ self.SetupBuiltinOption()
+ self.SetupFusedActivation()
+
+ def SetupBuiltinOption(self):
+ # FIXME: workaround for ops such as custom
+ try:
+ self.tf_options = OptionLoader.GetBuiltinOptions(
+ self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
+ if self.tf_options == None:
+ return
+
+ option_str = GetStringOptions(self.op_name, self.tf_options)
+ if option_str is None:
+ return
+
+ self.options = option_str
+ except KeyError:
+ return
+
+ def SetupFusedActivation(self):
+ if self.tf_options == None:
+ return
+ try:
+ activation_code = self.tf_options.FusedActivationFunction()
+ self.activation = EnumStrMaps.ActivationFunctionType[activation_code]
+ except AttributeError:
+ # This operator does not support FusedActivationFunction
+ pass
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_option.py b/tools/tflitefile_tool/parser/tflite/tflite_option.py
new file mode 100644
index 000000000..b85fbae90
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_option.py
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .tflite_enum_str_maps import EnumStrMaps
+
+
+def GetAttribute(o, *args):
+ import functools
+ return functools.reduce(getattr, args, o)
+
+
+def BuildBuiltinOptionGen():
+ bo_gen = {}
+ for val_enum in EnumStrMaps.BuiltinOptions:
+ val_str = EnumStrMaps.BuiltinOptions[val_enum]
+ try:
+ # Dynamically import Builtin Option classes
+ # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
+ module = __import__("tflite." + val_str)
+ bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
+ except ImportError as e:
+ assert val_enum == 0 and val_str == "NONE"
+ return bo_gen
+
+
+class OptionLoader:
+ builtinOptionGen = BuildBuiltinOptionGen()
+
+ @staticmethod
+ def GetBuiltinOptions(options_type, options_table):
+ if (options_table == None) and (options_type != 0):
+ print(
+ "Bad flatbuffer file: undefined builtin option table with defined option type"
+ )
+ exit(1)
+ options = OptionLoader.builtinOptionGen[options_type]()
+ options.Init(options_table.Bytes, options_table.Pos)
+ return options
+
+
+def GetStringPadding(options):
+ if options.Padding() == 0:
+ return "SAME"
+ elif options.Padding() == 1:
+ return "VALID"
+ else:
+ return "** wrong padding value **"
+
+
+def GetStringOptions(op_name, options):
+ if (op_name == "AVERAGE_POOL_2D" or op_name == "MAX_POOL_2D"):
+ return "{}, {}, {}".format(
+ "Filter W:H = {}:{}".format(options.FilterWidth(), options.FilterHeight()),
+ "Stride W:H = {}:{}".format(options.StrideW(),
+ options.StrideH()), "Padding = {}".format(
+ GetStringPadding(options)))
+ elif (op_name == "CONV_2D"):
+ return "{}, {}, {}".format(
+ "Stride W:H = {}:{}".format(options.StrideW(), options.StrideH()),
+ "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+ options.DilationHFactor()),
+ "Padding = {}".format(GetStringPadding(options)))
+ elif (op_name == "DEPTHWISE_CONV_2D"):
+ # yapf: disable
+ return "{}, {}, {}, {}".format(
+ "Stride W:H = {}:{}".format(options.StrideW(),
+ options.StrideH()),
+ "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+ options.DilationHFactor()),
+ "Padding = {}".format(GetStringPadding(options)),
+ "DepthMultiplier = {}".format(options.DepthMultiplier()))
+ # yapf: enable
+ elif (op_name == "STRIDED_SLICE"):
+ # yapf: disable
+ return "{}, {}, {}, {}, {}".format(
+ "begin_mask({})".format(options.BeginMask()),
+ "end_mask({})".format(options.EndMask()),
+ "ellipsis_mask({})".format(options.EllipsisMask()),
+ "new_axis_mask({})".format(options.NewAxisMask()),
+ "shrink_axis_mask({})".format(options.ShrinkAxisMask()))
+ # yapf: enable
+ else:
+ return None
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_parser.py b/tools/tflitefile_tool/parser/tflite/tflite_parser.py
new file mode 100755
index 000000000..6a8f2b8ab
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_parser.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.Model
+from .tflite_subgraph import TFLiteSubgraph
+from .tflite_operator import TFLiteOperator, EnumStrMaps
+from .tflite_tensor import TFLiteTensor, SetTensorTypeStr
+
+
+def HasOptionalTensor(tf_subgraph):
+ for operator_idx in range(tf_subgraph.OperatorsLength()):
+ tf_operator = tf_subgraph.Operators(operator_idx)
+ if -1 in tf_operator.InputsAsNumpy():
+ return True
+ output_tensors = tf_operator.OutputsAsNumpy()
+ if -1 in tf_operator.OutputsAsNumpy():
+ return True
+
+ return False
+
+
+class TFLiteSubgraphParser(object):
+ def __init__(self, tf_model, subgraph_index):
+ self.tf_model = tf_model
+ self.tf_subgraph = tf_model.Subgraphs(subgraph_index)
+ self.subg = TFLiteSubgraph(subgraph_index, self.tf_subgraph)
+
+ # Tensor type string table
+ SetTensorTypeStr()
+
+ def Parse(self):
+ if HasOptionalTensor(self.tf_subgraph):
+ # Prepare that optional input and output tensors are indicated by -1
+ self.subg.tensors_map[-1] = TFLiteTensor(-1, None, None)
+
+ # tensors
+ for tensor_idx in range(self.tf_subgraph.TensorsLength()):
+ tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
+ buffer_idx = tf_tensor.Buffer()
+ tf_buffer = self.tf_model.Buffers(buffer_idx)
+ t = TFLiteTensor(tensor_idx, tf_tensor, tf_buffer)
+ self.subg.tensors_map[tensor_idx] = t
+
+ # operators
+ for operator_idx in range(self.tf_subgraph.OperatorsLength()):
+ tf_operator = self.tf_subgraph.Operators(operator_idx)
+ op_name = self.GetOpcodeStr(tf_operator)
+ input_tensors = self.GetTensors(tf_operator.InputsAsNumpy())
+ output_tensors = self.GetTensors(tf_operator.OutputsAsNumpy())
+
+ op = TFLiteOperator(operator_idx, tf_operator, input_tensors, output_tensors,
+ op_name)
+ self.subg.operators_map[op.index] = op
+ self.subg.optypes_map[op.op_name] = op
+
+ self.subg.inputs = self.GetTensors(self.tf_subgraph.InputsAsNumpy())
+ self.subg.outputs = self.GetTensors(self.tf_subgraph.OutputsAsNumpy())
+
+ return self.subg
+
+ def GetOpcodeStr(self, tf_operator):
+ opcode_list_idx = tf_operator.OpcodeIndex()
+ opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
+ opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
+ if opcode_id == 32:
+ # Custom operator
+ custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
+ custom_op_name = custom_operator.CustomCode().decode('utf-8')
+ opcode_str = opcode_str + "(" + custom_op_name + ")"
+ return opcode_str
+
+ def GetTensors(self, tf_tensors_index):
+ assert len(self.subg.tensors_map.keys()) > 0
+
+ return_list = []
+ for tensor_idx in tf_tensors_index:
+ return_list.append(self.subg.tensors_map[tensor_idx])
+ return return_list
+
+
+class TFLiteParser(object):
+ def __init__(self, model_file):
+ self.model_file = model_file
+
+ def Parse(self):
+ # Generate Model: top structure of tflite model file
+ buf = self.model_file.read()
+ buf = bytearray(buf)
+ tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+
+ # Model file can have many models
+ subg_list = []
+ for subgraph_index in range(tf_model.SubgraphsLength()):
+ # Parse Subgraphs
+ subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+ subg = subg_parser.Parse()
+ subg_list.append(subg)
+
+ return subg_list
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py
new file mode 100755
index 000000000..0c6338ec6
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.subgraph import Subgraph
+
+
+class TFLiteSubgraph(Subgraph):
+ def __init__(self, subg_idx, tf_subgraph):
+ super(TFLiteSubgraph, self).__init__()
+ self.tf_subgraph = tf_subgraph
+
+ self.index = subg_idx
+ if tf_subgraph.Name() is not None:
+ self.subg_name = str(tf_subgraph.Name())
+ self.model_name = "#{0} {1}".format(subg_idx, self.subg_name)
+ if (subg_idx == 0): # 0th subgraph is main subgraph
+ self.model_name += " (MAIN)"
diff --git a/tools/tflitefile_tool/tensor_wrapping.py b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py
index 2a6dcaceb..afd6a2728 100755
--- a/tools/tflitefile_tool/tensor_wrapping.py
+++ b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py
@@ -14,8 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import numpy as np
import tflite.Tensor
import tflite.TensorType
+from ir.tensor import Tensor
TensorTypeList = {}
@@ -30,7 +32,7 @@ def SetTensorTypeStr():
TensorTypeList[fieldValue] = fieldName
-TYPES = {
+TYPES_SIZE = {
'BOOL': 1,
'COMPLEX64': 8,
'FLOAT16': 2,
@@ -38,29 +40,69 @@ TYPES = {
'INT16': 2,
'INT32': 4,
'INT64': 8,
- 'UINT8': 1
+ 'UINT8': 1,
+ 'NONE': 0,
}
def GetTypeSize(type_name):
try:
- return TYPES[type_name]
+ return TYPES_SIZE[type_name]
except KeyError as error:
return 0
-class Tensor(object):
+TYPE_TO_NPTYPE = {
+ 'BOOL': np.bool,
+ 'COMPLEX64': np.cdouble,
+ 'FLOAT16': np.float16,
+ 'FLOAT32': np.float32,
+ 'INT16': np.int16,
+ 'INT32': np.int32,
+ 'INT64': np.int64,
+ 'UINT8': np.uint8,
+}
+
+
+def ConvertProperNPArrayType(np_arr, np_shape, type_name):
+ try:
+ return np_arr.view(TYPE_TO_NPTYPE[type_name]).reshape(np_shape)
+ except KeyError as error:
+ return np_arr.view().reshape(np_shape)
+
+
+class TFLiteTensor(Tensor):
def __init__(self, tensor_idx, tf_tensor, tf_buffer):
- self.tensor_idx = tensor_idx
+ super(TFLiteTensor, self).__init__()
self.tf_tensor = tf_tensor
self.tf_buffer = tf_buffer
+ self.index = int(tensor_idx)
+ self.tensor = tf_tensor
+
# optional input
- if (self.tf_tensor != None):
- self.type_name = TensorTypeList[self.tf_tensor.Type()]
+ if self.index == -1:
+ self.type_name = "NONE"
+ # general input
else:
- self.type_name = None
+ assert tf_tensor is not None
+ assert tf_buffer is not None
+ self.tensor_name = str(tf_tensor.Name())
+ self.type_name = TensorTypeList[tf_tensor.Type()]
+ self.buffer_index = tf_tensor.Buffer()
+ if (tf_buffer.DataLength() > 0):
+ self.buffer = ConvertProperNPArrayType(tf_buffer.DataAsNumpy(),
+ tf_tensor.ShapeAsNumpy(),
+ self.type_name)
+
+ # shape: Empty list([]) will mean Scalar
+ for shape_idx in range(tf_tensor.ShapeLength()):
+ # when shape signature is -1, that means unknown dim
+ if tf_tensor.ShapeSignature(shape_idx) != -1:
+ self.shape.append(int(tf_tensor.Shape(shape_idx)))
+ else:
+ self.shape.append(-1)
self.memory_size = self.GetMemorySize()
@@ -71,12 +113,12 @@ class Tensor(object):
# memory size in bytes
size = int(type_size)
- shape_length = self.tf_tensor.ShapeLength()
+ shape_length = len(self.shape)
if shape_length == 0:
return size
for shape_idx in range(shape_length):
- shape_size = int(self.tf_tensor.Shape(shape_idx))
+ shape_size = int(self.shape[shape_idx])
size *= shape_size
return size
diff --git a/tools/tflitefile_tool/printer/__init__.py b/tools/tflitefile_tool/printer/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/printer/__init__.py
diff --git a/tools/tflitefile_tool/printer/string_builder.py b/tools/tflitefile_tool/printer/string_builder.py
new file mode 100644
index 000000000..d7654205a
--- /dev/null
+++ b/tools/tflitefile_tool/printer/string_builder.py
@@ -0,0 +1,175 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+UNIT_SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+CHAR_SYMBOLS = {'operator': '#', 'tensor': '%', 'buffer': '&'}
+
+
+def ConvertBytesToHuman(n):
+ n = int(n)
+ if n < 0:
+ return 0
+
+ format_str = "%(val)3.1f%(symb)s"
+ prefix = {}
+ for i, s in enumerate(UNIT_SYMBOLS[1:]):
+ prefix[s] = 1 << (i + 1) * 10
+
+ for symbol in reversed(UNIT_SYMBOLS[1:]):
+ if n >= prefix[symbol]:
+ v = float(n) / prefix[symbol]
+ return format_str % dict(symb=symbol, val=v)
+
+ return format_str % dict(symb=UNIT_SYMBOLS[0], val=n)
+
+
+def GetStringTensorIndex(tensors):
+ return_string = []
+ return_string.append("[")
+ for idx in range(len(tensors)):
+ if idx != 0:
+ return_string.append(", ")
+ return_string.append(CHAR_SYMBOLS['tensor'] + str(tensors[idx].index))
+ return_string.append("]")
+ return "".join(return_string)
+
+
+def GetStringShape(tensor):
+ shape_len = len(tensor.shape)
+ if shape_len == 0:
+ return "Scalar"
+ return_string = []
+ return_string.append("[")
+ for shape_idx in range(shape_len):
+ if (shape_idx != 0):
+ return_string.append(", ")
+ return_string.append(str(tensor.shape[shape_idx]))
+ return_string.append("]")
+ return "".join(return_string)
+
+
+def GetStringTensor(tensor):
+ info = ""
+ if tensor.index < 0:
+ info = "{:5} : {}".format(CHAR_SYMBOLS['tensor'] + str(tensor.index),
+ "(OPTIONAL)")
+ else:
+ shape_str = GetStringShape(tensor)
+ type_name = tensor.type_name
+ shape_name = tensor.tensor_name
+ memory_size = ConvertBytesToHuman(tensor.memory_size)
+
+ buffer = ["("]
+ if tensor.buffer is not None:
+ buffer.append(
+ "{:5}: ".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+ # if too big, just skip it.
+ if tensor.buffer.size > 4:
+ buffer.append("".join(['[' for _ in range(tensor.buffer.ndim)]))
+ buffer.append(" ... ")
+ buffer.append("".join([']' for _ in range(tensor.buffer.ndim)]))
+ else:
+ buffer.append(
+ np.array2string(
+ tensor.buffer,
+ precision=3,
+ separator=', ',
+ threshold=4,
+ edgeitems=2))
+ else:
+ buffer.append("Empty")
+ buffer.append(")")
+ buffer_str = "".join(buffer)
+
+ info = "{:5} : buffer {:25} | {:7} | Memory {:6} | Shape {} ({})".format(
+ CHAR_SYMBOLS['tensor'] + str(tensor.index), buffer_str, type_name,
+ memory_size, shape_str, shape_name)
+ return info
+
+
+def GetStringBuffer(tensor):
+ buffer = []
+ buffer.append("Buffer {:5}".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+ buffer.append("\n")
+ buffer.append(np.array2string(tensor.buffer, separator=', '))
+ return "".join(buffer)
+
+
+class StringBuilder(object):
+ def __init__(self, spacious_str=" "):
+ self.spacious_str = spacious_str
+
+ def GraphStats(self, stats):
+ results = []
+
+ results.append("{:38}: {:4}".format("Number of all operator types",
+ len(stats.op_counts)))
+
+ # op type stats
+ for op_name in sorted(stats.op_counts.keys()):
+ occur = stats.op_counts[op_name]
+ optype_info_str = "{:38}: {:4}".format(self.spacious_str + op_name, occur)
+ results.append(optype_info_str)
+
+ summary_str = "{0:38}: {1:4}".format("Number of all operators",
+ sum(stats.op_counts.values()))
+ results.append(summary_str)
+ results.append('')
+
+ # memory stats
+ results.append("Expected TOTAL memory: {}".format(
+ ConvertBytesToHuman(stats.total_memory)))
+ results.append("Expected FILLED memory: {}".format(
+ ConvertBytesToHuman(stats.filled_memory)))
+
+ return "\n".join(results)
+
+ def Operator(self, operator):
+ results = []
+ results.append("{} {}".format(CHAR_SYMBOLS['operator'] + str(operator.index),
+ operator.op_name))
+ results.append("{}Fused Activation: {}".format(self.spacious_str,
+ operator.activation))
+ results.append("{}Input Tensors{}".format(self.spacious_str,
+ GetStringTensorIndex(operator.inputs)))
+ for tensor in operator.inputs:
+ results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+ results.append("{}Output Tensors{}".format(self.spacious_str,
+ GetStringTensorIndex(
+ operator.outputs)))
+ for tensor in operator.outputs:
+ results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+ # operator option
+ # Some operations does not have option. In such case no option is printed
+ if operator.options != None and operator.options != "":
+ results.append(self.Option(operator.options, self.spacious_str))
+ return "\n".join(results)
+
+ def Tensor(self, tensor, depth_str=""):
+ results = []
+ results.append("{}{}".format(depth_str, GetStringTensor(tensor)))
+ return "".join(results)
+
+ def Option(self, options_str, depth_str=""):
+ results = []
+ results.append("{}Options".format(depth_str))
+ results.append("{}{}{}".format(depth_str, self.spacious_str, options_str))
+ return "\n".join(results)
+
+ def Buffer(self, tensor, depth_str=""):
+ return "{}{}".format(depth_str, GetStringBuffer(tensor))
diff --git a/tools/tflitefile_tool/printer/subgraph_printer.py b/tools/tflitefile_tool/printer/subgraph_printer.py
new file mode 100755
index 000000000..51d8453ae
--- /dev/null
+++ b/tools/tflitefile_tool/printer/subgraph_printer.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir import graph_stats
+from .string_builder import StringBuilder
+
+
+class SubgraphPrinter(object):
+ def __init__(self, verbose, subg, spacious_str=" "):
+ self.verbose = verbose
+ self.subg = subg
+ self.spacious_str = spacious_str
+ self.print_all_tensor = True
+ self.print_tensor_index_list = None
+ self.print_all_operator = True
+ self.print_operator_index_list = None
+
+ def SetPrintSpecificTensors(self, tensor_indices):
+ if len(tensor_indices) != 0:
+ self.print_all_tensor = False
+ self.print_tensor_index_list = tensor_indices
+
+ def SetPrintSpecificOperators(self, operator_indices):
+ if len(operator_indices) != 0:
+ self.print_all_operator = False
+ self.print_operator_index_list = operator_indices
+
+ def PrintInfo(self):
+ if self.print_all_tensor == True and self.print_all_operator == True:
+ print("[" + self.subg.model_name + "]")
+ print('')
+ if self.verbose > 0:
+ self.PrintModelInfo()
+ print('')
+ self.PrintOperators()
+ if self.verbose == 2:
+ self.PrintBuffers()
+ self.PrintGraphStats()
+
+ if self.verbose == 0:
+ return
+
+ if self.print_all_tensor == False:
+ print('')
+ self.PrintSpecificTensors(self.print_tensor_index_list)
+ print('')
+
+ if self.print_all_operator == False:
+ print('')
+ self.PrintSpecificOperators(self.print_operator_index_list)
+ print('')
+
+ def PrintModelInfo(self):
+ model_inputs = []
+ for t in self.subg.inputs:
+ model_inputs.append(t.index)
+ model_outputs = []
+ for t in self.subg.outputs:
+ model_outputs.append(t.index)
+ print(self.subg.model_name + " input tensors: " + str(model_inputs))
+ self.PrintSpecificTensors(model_inputs, " ")
+ print(self.subg.model_name + " output tensors: " + str(model_outputs))
+ self.PrintSpecificTensors(model_outputs, " ")
+
+ def PrintOperators(self):
+ for index, operator in self.subg.operators_map.items():
+ info = StringBuilder(self.spacious_str).Operator(operator)
+ print(info)
+ print()
+
+ def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
+ for index in print_tensor_index_list:
+ tensor = self.subg.tensors_map[index]
+ info = StringBuilder(self.spacious_str).Tensor(tensor, depth_str)
+ print(info)
+
+ def PrintSpecificOperators(self, print_operator_index_list):
+ for index in print_operator_index_list:
+ operator = self.subg.operators_map[index]
+ info = StringBuilder(self.spacious_str).Operator(operator)
+ print(info)
+
+ def PrintGraphStats(self):
+ stats = graph_stats.CalcGraphStats(self.subg)
+ info = StringBuilder(self.spacious_str).GraphStats(stats)
+ print(info)
+
+ def PrintBuffers(self):
+ for index, tensor in self.subg.tensors_map.items():
+ if tensor.buffer is not None:
+ info = StringBuilder(self.spacious_str).Buffer(tensor)
+ print(info)
+ print()
diff --git a/tools/tflitefile_tool/saver/__init__.py b/tools/tflitefile_tool/saver/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/saver/__init__.py
diff --git a/tools/tflitefile_tool/config_saver.py b/tools/tflitefile_tool/saver/config_saver.py
index abf2c0ca2..fa359693f 100755
--- a/tools/tflitefile_tool/config_saver.py
+++ b/tools/tflitefile_tool/saver/config_saver.py
@@ -14,19 +14,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
+from printer.string_builder import GetStringShape
+# TODO: Revise it as minimized `write` methods by using `StringBuilder`
class ConfigSaver(object):
def __init__(self, file_name, operator):
self.file_name = file_name
self.operator = operator
# Set self.verbose to 1 level to print more information
self.verbose = 1
- self.op_idx = operator.operator_idx
- self.op_name = operator.opcode_str
+ self.op_idx = operator.index
+ self.op_name = operator.op_name
+ self.options = operator.tf_options
self.f = open(file_name, 'at')
@@ -50,18 +50,15 @@ class ConfigSaver(object):
if (len(self.operator.inputs) != 3):
raise AssertionError('Conv2D input count should be 3')
- inputs = self.operator.inputs[0]
- weights = self.operator.inputs[1]
+ input = self.operator.inputs[0]
+ weight = self.operator.inputs[1]
bias = self.operator.inputs[2]
- self.f.write("input: {}\n".format(
- TensorPrinter(self.verbose, inputs).GetShapeString()))
- self.f.write("input_type: {}\n".format(inputs.type_name))
- self.f.write("weights: {}\n".format(
- TensorPrinter(self.verbose, weights).GetShapeString()))
- self.f.write("weights_type: {}\n".format(weights.type_name))
- self.f.write("bias: {}\n".format(
- TensorPrinter(self.verbose, bias).GetShapeString()))
+ self.f.write("input: {}\n".format(GetStringShape(input)))
+ self.f.write("input_type: {}\n".format(input.type_name))
+ self.f.write("weights: {}\n".format(GetStringShape(weight)))
+ self.f.write("weights_type: {}\n".format(weight.type_name))
+ self.f.write("bias: {}\n".format(GetStringShape(bias)))
self.f.write("bias_type: {}\n".format(bias.type_name))
def SaveInputs(self):
@@ -69,7 +66,7 @@ class ConfigSaver(object):
self.f.write("input_counts: {}\n".format(total))
for idx in range(total):
tensor = self.operator.inputs[idx]
- input_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
+ input_shape_str = GetStringShape(tensor)
self.f.write("input{}: {}\n".format(idx, input_shape_str))
self.f.write("input{}_type: {}\n".format(idx, tensor.type_name))
@@ -78,37 +75,33 @@ class ConfigSaver(object):
self.f.write("output_counts: {}\n".format(total))
for idx in range(total):
tensor = self.operator.outputs[idx]
- output_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
+ output_shape_str = GetStringShape(tensor)
self.f.write("output{}: {}\n".format(idx, output_shape_str))
self.f.write("output{}_type: {}\n".format(idx, tensor.type_name))
def SaveFilter(self):
- self.f.write("filter_w: {}\n".format(self.operator.options.FilterWidth()))
- self.f.write("filter_h: {}\n".format(self.operator.options.FilterHeight()))
+ self.f.write("filter_w: {}\n".format(self.options.FilterWidth()))
+ self.f.write("filter_h: {}\n".format(self.options.FilterHeight()))
def SaveStride(self):
- self.f.write("stride_w: {}\n".format(self.operator.options.StrideW()))
- self.f.write("stride_h: {}\n".format(self.operator.options.StrideH()))
+ self.f.write("stride_w: {}\n".format(self.options.StrideW()))
+ self.f.write("stride_h: {}\n".format(self.options.StrideH()))
def SaveDilation(self):
- self.f.write("dilation_w: {}\n".format(self.operator.options.DilationWFactor()))
- self.f.write("dilation_h: {}\n".format(self.operator.options.DilationHFactor()))
+ self.f.write("dilation_w: {}\n".format(self.options.DilationWFactor()))
+ self.f.write("dilation_h: {}\n".format(self.options.DilationHFactor()))
def SavePadding(self):
- if self.operator.options.Padding() == 0:
+ if self.options.Padding() == 0:
self.f.write("padding: SAME\n")
- elif self.operator.options.Padding() == 1:
+ elif self.options.Padding() == 1:
self.f.write("padding: VALID\n")
def SaveFusedAct(self):
- if self.operator.fused_activation is not "NONE":
- self.f.write("fused_act: {}\n".format(self.operator.fused_activation))
+ if self.operator.activation is not "NONE":
+ self.f.write("fused_act: {}\n".format(self.operator.activation))
def SaveAttributes(self):
- # operator option
- # Some operations does not have option. In such case no option is printed
- option_str = OptionPrinter(self.verbose, self.op_name,
- self.operator.options).GetOptionString()
if self.op_name == 'AVERAGE_POOL_2D' or self.op_name == 'MAX_POOL_2D':
self.SaveFilter()
self.SaveStride()
@@ -124,7 +117,6 @@ class ConfigSaver(object):
self.SaveStride()
self.SaveDilation()
self.SavePadding()
- self.f.write("depthmultiplier: {}\n".format(
- self.operator.options.DepthMultiplier()))
+ self.f.write("depthmultiplier: {}\n".format(self.options.DepthMultiplier()))
self.SaveFusedAct()
diff --git a/tools/tflitefile_tool/model_saver.py b/tools/tflitefile_tool/saver/model_saver.py
index 15037a1fe..117ec76df 100755
--- a/tools/tflitefile_tool/model_saver.py
+++ b/tools/tflitefile_tool/saver/model_saver.py
@@ -14,17 +14,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from config_saver import ConfigSaver
+from .config_saver import ConfigSaver
class ModelSaver(object):
- def __init__(self, model_name, op_parser):
- self.model_name = model_name
- self.op_parser = op_parser
+ def __init__(self, subg):
+ self.model_name = subg.model_name
+ self.subg = subg.subg
def SaveConfigInfo(self, prefix):
print("Save model configuration file")
- for type_str, oper_list in self.op_parser.operators_per_type.items():
+ for type_str, oper_list in self.subg.optypes_map.items():
if prefix:
file_name = "{}_{}_{}.config".format(prefix, self.model_name, type_str)
else:
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py
index dccb3454f..fdef38c79 100755
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -22,6 +22,19 @@ import tflite.Model
import tflite.SubGraph
import tflite.BuiltinOptions
import argparse
+import pkg_resources
+
+
+# On flatbuffers 2.0, EndVector doesn't require length argument any more.
+# But flatbuffers under 2.0 (ex. 1.12) requires length argument.
+# We need this workaround until we abandon flatbuffers 1.12.
+# Reference: https://github.com/google/flatbuffers/issues/6858
+def EndVector(builder, len):
+ flat_version = pkg_resources.get_distribution('flatbuffers').version
+ if pkg_resources.parse_version(flat_version) < pkg_resources.parse_version("2.0"):
+ return builder.EndVector(len)
+ else:
+ return builder.EndVector()
# Assume we use only main model in model file
@@ -135,7 +148,7 @@ def GenerateOperatorCodes(new_builder, sample_model, used_opcodes_dic,
for operator_code_idx in reversed(range(new_operator_code_num)):
new_builder.PrependUOffsetTRelative(new_operator_code_list[operator_code_idx])
- return new_builder.EndVector(new_operator_code_num)
+ return EndVector(new_builder, new_operator_code_num)
def GenerateQuantization(new_builder, selected_quantization):
@@ -146,7 +159,7 @@ def GenerateQuantization(new_builder, selected_quantization):
new_builder, min_num)
for min_idx in reversed(range(min_num)):
new_builder.PrependFloat32(selected_quantization.Min(min_idx))
- new_min = new_builder.EndVector(min_num)
+ new_min = EndVector(new_builder, min_num)
# Create max vector
max_num = selected_quantization.MaxLength()
@@ -155,7 +168,7 @@ def GenerateQuantization(new_builder, selected_quantization):
new_builder, max_num)
for max_idx in reversed(range(max_num)):
new_builder.PrependFloat32(selected_quantization.Max(max_idx))
- new_max = new_builder.EndVector(max_num)
+ new_max = EndVector(new_builder, max_num)
# Create scale vector
scale_num = selected_quantization.ScaleLength()
@@ -164,7 +177,7 @@ def GenerateQuantization(new_builder, selected_quantization):
new_builder, scale_num)
for scale_idx in reversed(range(scale_num)):
new_builder.PrependFloat32(selected_quantization.Scale(scale_idx))
- new_scale = new_builder.EndVector(scale_num)
+ new_scale = EndVector(new_builder, scale_num)
# Create zero_point vector
zeropoint_num = selected_quantization.ZeroPointLength()
@@ -173,7 +186,7 @@ def GenerateQuantization(new_builder, selected_quantization):
new_builder, zeropoint_num)
for zeropoint_idx in reversed(range(zeropoint_num)):
new_builder.PrependInt64(selected_quantization.ZeroPoint(zeropoint_idx))
- new_zeropoint = new_builder.EndVector(zeropoint_num)
+ new_zeropoint = EndVector(new_builder, zeropoint_num)
# Create quantization
tflite.QuantizationParameters.QuantizationParametersStart(new_builder)
@@ -204,7 +217,7 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
if shape_num != 0:
for shape_idx in reversed(range(shape_num)):
new_builder.PrependInt32(selected_tensor.Shape(shape_idx))
- new_shape = new_builder.EndVector(shape_num)
+ new_shape = EndVector(new_builder, shape_num)
# Create tensor_type
tensor_type = selected_tensor.Type()
@@ -268,7 +281,7 @@ def GenerateTensors(new_builder, selected_subgraph, used_tensors_dic, used_buffe
for new_tensor in reversed(new_tensor_list):
new_builder.PrependUOffsetTRelative(new_tensor)
- return new_builder.EndVector(new_tensor_num)
+ return EndVector(new_builder, new_tensor_num)
def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_type,
@@ -474,7 +487,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
for new_shape_idx in reversed(range(shape_num)):
new_shape_val = reshape_option.NewShape(new_shape_idx)
new_builder.PrependInt32(new_shape_val)
- new_shape = new_builder.EndVector(shape_num)
+ new_shape = EndVector(new_builder, shape_num)
tflite.ReshapeOptions.ReshapeOptionsStart(new_builder)
if shape_num != 0:
@@ -613,7 +626,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
for squeeze_dims_idx in reversed(range(squeeze_dims_num)):
squeeze_dims_val = squeeze_option.SqueezeDims(squeeze_dims_idx)
new_builder.PrependInt32(squeeze_dims_val)
- new_squeeze_dims = new_builder.EndVector(squeeze_dims_num)
+ new_squeeze_dims = EndVector(new_builder, squeeze_dims_num)
tflite.SqueezeOptions.SqueezeOptionsStart(new_builder)
if squeeze_dims_num != 0:
@@ -997,7 +1010,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
else:
new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
new_builder.PrependInt32(new_input_tensor_idx)
- new_input = new_builder.EndVector(input_num)
+ new_input = EndVector(new_builder, input_num)
# create output_vector
output_num = selected_operator.OutputsLength()
@@ -1007,7 +1020,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
output_tensor_idx = selected_operator.Outputs(output_idx)
new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
new_builder.PrependInt32(new_output_tensor_idx)
- new_output = new_builder.EndVector(output_num)
+ new_output = EndVector(new_builder, output_num)
# Create builtin_option
builtin_option_type = selected_operator.BuiltinOptionsType()
@@ -1022,7 +1035,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
tflite.Operator.OperatorStartCustomOptionsVector(new_builder, custom_option_num)
for custom_option_idx in reversed(range(custom_option_num)):
new_builder.PrependUint8(selected_operator.CustomOptions(custom_option_idx))
- new_custom_option = new_builder.EndVector(custom_option_num)
+ new_custom_option = EndVector(new_builder, custom_option_num)
# Create custum option type
custom_option_type = selected_operator.CustomOptionsFormat()
@@ -1067,7 +1080,7 @@ def GenerateOperators(new_builder, selected_subgraph, operator_list, used_tensor
for new_operator in reversed(new_operator_list):
new_builder.PrependUOffsetTRelative(new_operator)
- return new_builder.EndVector(new_operator_num)
+ return EndVector(new_builder, new_operator_num)
def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_tensor,
@@ -1085,7 +1098,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
for input_tensor_idx in reversed(new_input_tensor):
new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
new_builder.PrependInt32(new_input_tensor_idx)
- new_inputs = new_builder.EndVector(new_input_tensor_num)
+ new_inputs = EndVector(new_builder, new_input_tensor_num)
# Create output vector for subgraph table
new_output_tensor_num = len(new_output_tensor)
@@ -1094,7 +1107,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
for output_tensor_idx in reversed(new_output_tensor):
new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
new_builder.PrependInt32(new_output_tensor_idx)
- new_outputs = new_builder.EndVector(new_output_tensor_num)
+ new_outputs = EndVector(new_builder, new_output_tensor_num)
# Operators
operators = GenerateOperators(new_builder, selected_subgraph, operator_list,
@@ -1161,7 +1174,7 @@ def GenerateSubgraphs(args, new_builder, sample_model, operator_list, new_input_
for subgraph_idx in reversed(range(new_subgraph_num)):
new_builder.PrependUOffsetTRelative(new_subgraph_list[subgraph_idx])
- return new_builder.EndVector(new_subgraph_num)
+ return EndVector(new_builder, new_subgraph_num)
def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
@@ -1181,7 +1194,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
tflite.Buffer.BufferStartDataVector(new_builder, buffer_length)
for buffer_data_idx in reversed(range(buffer_length)):
new_builder.PrependUint8(buffer.Data(buffer_data_idx))
- new_buffer = new_builder.EndVector(buffer_length)
+ new_buffer = EndVector(new_builder, buffer_length)
new_buffer_data_list[buffer_idx] = new_buffer
# Create tables of buffer
@@ -1205,7 +1218,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
for new_buffer_idx in reversed(range(new_buffer_num)):
new_builder.PrependUOffsetTRelative(new_buffer_list[new_buffer_idx])
- return new_builder.EndVector(new_buffer_num)
+ return EndVector(new_builder, new_buffer_num)
def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tensors,
diff --git a/tools/tflitefile_tool/subgraph_printer.py b/tools/tflitefile_tool/subgraph_printer.py
deleted file mode 100755
index cce7ff53b..000000000
--- a/tools/tflitefile_tool/subgraph_printer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_printer import OperatorPrinter
-from tensor_printer import TensorPrinter
-import graph_stats
-
-
-class SubgraphPrinter(object):
- def __init__(self, verbose, op_parser, model_name):
- self.verbose = verbose
- self.op_parser = op_parser
- self.model_name = model_name
- self.print_all_tensor = True
- self.print_tensor_index_list = None
- self.print_all_operator = True
- self.print_operator_index_list = None
-
- def SetPrintSpecificTensors(self, tensor_indices):
- if len(tensor_indices) != 0:
- self.print_all_tensor = False
- self.print_tensor_index_list = tensor_indices
-
- def SetPrintSpecificOperators(self, operator_indices):
- if len(operator_indices) != 0:
- self.print_all_operator = False
- self.print_operator_index_list = operator_indices
-
- def PrintInfo(self):
- if self.print_all_tensor == True and self.print_all_operator == True:
- self.PrintModelInfo()
- self.PrintAllOperatorsInList()
- graph_stats.PrintGraphStats(
- graph_stats.CalcGraphStats(self.op_parser), self.verbose)
-
- if self.print_all_tensor == False:
- print('')
- self.PrintSpecificTensors(self.print_tensor_index_list)
- print('')
-
- if self.print_all_operator == False:
- print('')
- self.PrintSpecificOperators(self.print_operator_index_list)
- print('')
-
- def PrintModelInfo(self):
- print("[" + self.model_name + "]\n")
- if self.verbose > 0:
- model_inputs = self.op_parser.tf_subgraph.InputsAsNumpy()
- model_outputs = self.op_parser.tf_subgraph.OutputsAsNumpy()
- print(self.model_name + " input tensors: " + str(model_inputs))
- self.PrintSpecificTensors(model_inputs, "\t")
- print(self.model_name + " output tensors: " + str(model_outputs))
- self.PrintSpecificTensors(model_outputs, "\t")
- print('')
-
- def PrintAllOperatorsInList(self):
- if (self.verbose < 1):
- return
-
- for operator in self.op_parser.operators_in_list:
- printer = OperatorPrinter(self.verbose, operator)
- printer.PrintInfo()
- print('')
-
- print('')
-
- def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
- for tensor in self.op_parser.GetTensors(print_tensor_index_list):
- printer = TensorPrinter(self.verbose, tensor)
- printer.PrintInfo(depth_str)
-
- def PrintSpecificOperators(self, print_operator_index_list):
- for operator in self.op_parser.operators_in_list:
- if operator.operator_idx in print_operator_index_list:
- printer = OperatorPrinter(self.verbose, operator)
- printer.PrintInfo()
diff --git a/tools/tflitefile_tool/tensor_printer.py b/tools/tflitefile_tool/tensor_printer.py
deleted file mode 100755
index 108a119d6..000000000
--- a/tools/tflitefile_tool/tensor_printer.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from tensor_wrapping import Tensor
-
-SYMBOLS = ['B', 'K', 'M', 'G', 'T']
-
-
-def ConvertBytesToHuman(n):
- n = int(n)
- if n < 0:
- return 0
-
- format_str = "%(val)3.1f%(symb)s"
- prefix = {}
- for i, s in enumerate(SYMBOLS[1:]):
- prefix[s] = 1 << (i + 1) * 10
-
- for symbol in reversed(SYMBOLS[1:]):
- if n >= prefix[symbol]:
- v = float(n) / prefix[symbol]
- return format_str % dict(symb=symbol, val=v)
-
- return format_str % dict(symb=SYMBOLS[0], val=n)
-
-
-class TensorPrinter(object):
- def __init__(self, verbose, tensor):
- self.verbose = verbose
- self.tensor = tensor
-
- def PrintInfo(self, depth_str=""):
- if (self.verbose < 1):
- pass
-
- print_str = ""
- if self.tensor.tensor_idx < 0:
- print_str = "Tensor {0:4}".format(self.tensor.tensor_idx)
- else:
- buffer_idx = self.tensor.tf_tensor.Buffer()
- buffer_str = "Empty" if buffer_idx == 0 else str(buffer_idx)
- isEmpty = "Filled"
- if (self.tensor.tf_buffer.DataLength() == 0):
- isEmpty = " Empty"
- shape_str = self.GetShapeString()
- type_name = self.tensor.type_name
-
- shape_name = ""
- if self.tensor.tf_tensor.Name() != 0:
- shape_name = self.tensor.tf_tensor.Name()
-
- memory_size = ConvertBytesToHuman(self.tensor.memory_size)
-
- print_str = "Tensor {0:4} : buffer {1:5} | {2} | {3:7} | Memory {4:6} | Shape {5} ({6})".format(
- self.tensor.tensor_idx, buffer_str, isEmpty, type_name, memory_size,
- shape_str, shape_name)
- print(depth_str + print_str)
-
- def GetShapeString(self):
- if self.tensor.tf_tensor.ShapeLength() == 0:
- return "Scalar"
- return_string = "["
- for shape_idx in range(self.tensor.tf_tensor.ShapeLength()):
- if (shape_idx != 0):
- return_string += ", "
- # when shape signature is -1, that means unknown dim
- if self.tensor.tf_tensor.ShapeSignature(shape_idx) != -1:
- return_string += str(self.tensor.tf_tensor.Shape(shape_idx))
- else:
- return_string += "-1"
- return_string += "]"
- return return_string
diff --git a/tools/tflitefile_tool/tests/README.md b/tools/tflitefile_tool/tests/README.md
new file mode 100644
index 000000000..0d1d70786
--- /dev/null
+++ b/tools/tflitefile_tool/tests/README.md
@@ -0,0 +1,36 @@
+# How to test
+
+## Prepare
+
+There is `add.tflite` in `ONE/nnpackage/examples/v1.0.0/add`.
+
+```
+ONE$ find ./nnpackage -name "add.tflite"
+./nnpackage/examples/v1.0.0/add/add.tflite
+```
+
+## Test
+
+```
+ONE/tools/tflitefile_tool$ python -m unittest discover
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+OR
+
+```
+ONE/tools/tflitefile_tool$ python ./tests/main.py
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+## Reference
+
+https://docs.python.org/3.6/library/unittest.html
diff --git a/tools/tflitefile_tool/tests/__init__.py b/tools/tflitefile_tool/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/tests/__init__.py
diff --git a/tools/tflitefile_tool/tests/main.py b/tools/tflitefile_tool/tests/main.py
new file mode 100644
index 000000000..b9c7104bb
--- /dev/null
+++ b/tools/tflitefile_tool/tests/main.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+if __name__ == '__main__':
+ loader = unittest.TestLoader()
+ tests = loader.discover('.')
+ testRunner = unittest.runner.TextTestRunner()
+ testRunner.run(tests)
diff --git a/tools/tflitefile_tool/tests/test_operator.py b/tools/tflitefile_tool/tests/test_operator.py
new file mode 100644
index 000000000..7d6fbe859
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_operator.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+from ir.operator import Operator
+
+
+# Test the only getter/setter
+class OperatorTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_index(self):
+ op = Operator()
+ op.index = 1000
+ self.assertEqual(op.index, 1000)
+
+ def test_inputs(self):
+ op = Operator()
+ t0 = Tensor()
+ t0.index = 0
+ t1 = Tensor()
+ t1.index = 1
+ op.inputs = [t0, t1]
+ self.assertEqual(len(op.inputs), 2)
+ self.assertEqual(op.inputs[0], t0)
+ self.assertEqual(op.inputs[1], t1)
+
+ def test_outputs(self):
+ op = Operator()
+ t0 = Tensor()
+ t0.index = 0
+ t1 = Tensor()
+ t1.index = 1
+ op.outputs = [t0, t1]
+ self.assertEqual(len(op.outputs), 2)
+ self.assertEqual(op.outputs[0], t0)
+ self.assertEqual(op.outputs[1], t1)
+
+ def test_op_name(self):
+ op = Operator()
+ op.op_name = "ADD"
+ self.assertEqual(op.op_name, "ADD")
+
+ def test_activation(self):
+ op = Operator()
+ op.activation = "Tanh"
+ self.assertEqual(op.activation, "Tanh")
+
+ def test_options(self):
+ op = Operator()
+ op.options = "Options ..."
+ self.assertEqual(op.options, "Options ...")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_setup.py b/tools/tflitefile_tool/tests/test_setup.py
new file mode 100644
index 000000000..f38a2d66a
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_setup.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import unittest
+
+# Python doesn't have const var but handle these as const
+# It's meaning that DO NOT MODIFY these vars
+THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_MODEL_DIR = os.path.join(THIS_FILE_DIR, "../../../nnpackage/examples/v1.0.0/add")
+TEST_MODEL_PATH = os.path.join(TEST_MODEL_DIR, "add.tflite")
+
+
+def Exist_TEST_MODEL_DIR(dir):
+ return os.path.exists(dir) and os.path.isdir(dir)
+
+
+def Exist_TEST_MODEL_FILE(file):
+ return os.path.exists(file) and os.path.isfile(file)
+
+
+class Setup(unittest.TestCase):
+ def test_Exist_TEST_MODEL_DIR(self):
+ model_dir = TEST_MODEL_DIR
+ self.assertTrue(Exist_TEST_MODEL_DIR(model_dir))
+
+ def test_Exist_TEST_MODEL_FILE(self):
+ model_file = TEST_MODEL_PATH
+ self.assertTrue(Exist_TEST_MODEL_FILE(model_file))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_string_builder.py b/tools/tflitefile_tool/tests/test_string_builder.py
new file mode 100644
index 000000000..97a580967
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_string_builder.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from printer.string_builder import *
+
+
+class StringBuilderTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_ConvertBytesToHuman(self):
+ SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+ format_str = "%(val)3.1f%(symb)s"
+
+ bytes = -1
+ self.assertEqual(ConvertBytesToHuman(bytes), 0)
+
+ bytes = 1
+ self.assertEqual(
+ ConvertBytesToHuman(bytes), format_str % dict(symb=SYMBOLS[0], val=(bytes)))
+
+ bytes = 1024
+ self.assertEqual(
+ ConvertBytesToHuman(bytes),
+ format_str % dict(symb=SYMBOLS[1], val=(bytes / 1024)))
+
+ bytes = 1024**2
+ self.assertEqual(
+ ConvertBytesToHuman(bytes),
+ format_str % dict(symb=SYMBOLS[2], val=(bytes / (1024**2))))
+
+ bytes = 1024**3
+ self.assertEqual(
+ ConvertBytesToHuman(bytes),
+ format_str % dict(symb=SYMBOLS[3], val=(bytes / (1024**3))))
+
+ bytes = 1024**4
+ self.assertEqual(
+ ConvertBytesToHuman(bytes),
+ format_str % dict(symb=SYMBOLS[4], val=(bytes / (1024**4))))
+
+ # TODO: More tests
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_subgraph.py b/tools/tflitefile_tool/tests/test_subgraph.py
new file mode 100644
index 000000000..7930ed03c
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_subgraph.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+# Csubgyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a csubgy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.subgraph import Subgraph
+from ir.operator import Operator
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class SubgraphTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_index(self):
+ subg = Subgraph()
+ subg.index = 1000
+ self.assertEqual(subg.index, 1000)
+
+ def test_inputs(self):
+ subg = Subgraph()
+ t0 = Tensor()
+ t0.index = 0
+ t1 = Tensor()
+ t1.index = 1
+ subg.inputs = [t0, t1]
+ self.assertEqual(len(subg.inputs), 2)
+ self.assertEqual(subg.inputs[0], t0)
+ self.assertEqual(subg.inputs[0].index, 0)
+ self.assertEqual(subg.inputs[1], t1)
+ self.assertEqual(subg.inputs[1].index, 1)
+
+ def test_outputs(self):
+ subg = Subgraph()
+ t0 = Tensor()
+ t0.index = 0
+ t1 = Tensor()
+ t1.index = 1
+ subg.outputs = [t0, t1]
+ self.assertEqual(len(subg.outputs), 2)
+ self.assertEqual(subg.outputs[0], t0)
+ self.assertEqual(subg.outputs[0].index, 0)
+ self.assertEqual(subg.outputs[1], t1)
+ self.assertEqual(subg.outputs[1].index, 1)
+
+ def test_subg_name(self):
+ subg = Subgraph()
+ subg.subg_name = "SUBGRAPH_0"
+ self.assertEqual(subg.subg_name, "SUBGRAPH_0")
+
+ def test_model_name(self):
+ subg = Subgraph()
+ subg.model_name = "SUBGRAPH_0"
+ self.assertEqual(subg.model_name, "SUBGRAPH_0")
+
+ def test_tensors_map(self):
+ subg = Subgraph()
+ t0 = Tensor()
+ t0.index = 0
+ t1 = Tensor()
+ t1.index = 1
+ subg.tensors_map[t0.index] = t0
+ subg.tensors_map[t1.index] = t1
+ self.assertEqual(len(subg.tensors_map.keys()), 2)
+ self.assertEqual(subg.tensors_map[t0.index], t0)
+ self.assertEqual(subg.tensors_map[t1.index], t1)
+
+ def test_operators_map(self):
+ subg = Subgraph()
+ op0 = Operator()
+ op0.index = 0
+ op0.op_name = "ADD"
+ op1 = Operator()
+ op1.index = 1
+ op1.op_name = "SUB"
+ subg.operators_map[op0.index] = op0
+ subg.operators_map[op1.index] = op1
+ self.assertEqual(len(subg.operators_map.keys()), 2)
+ self.assertEqual(subg.operators_map[op0.index], op0)
+ self.assertEqual(subg.operators_map[op1.index], op1)
+
+ def test_optypes_map(self):
+ subg = Subgraph()
+ op0 = Operator()
+ op0.index = 0
+ op0.op_name = "ADD"
+ op1 = Operator()
+ op1.index = 1
+ op1.op_name = "SUB"
+ op2 = Operator()
+ op2.index = 2
+ op2.op_name = "SUB"
+
+ subg.optypes_map[op0.op_name] = op0
+ subg.optypes_map[op1.op_name] = op1
+ subg.optypes_map[op2.op_name] = op2
+
+ self.assertEqual(len(subg.optypes_map.keys()), 2)
+ self.assertEqual(len(subg.optypes_map[op0.op_name]), 1)
+ self.assertEqual(len(subg.optypes_map[op2.op_name]), 2)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tensor.py b/tools/tflitefile_tool/tests/test_tensor.py
new file mode 100644
index 000000000..200f49557
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tensor.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class TensorTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_index(self):
+ t = Tensor()
+ t.index = 1000
+ self.assertEqual(t.index, 1000)
+
+ def test_tensor_name(self):
+ t = Tensor()
+ t.tensor_name = "input"
+ self.assertEqual(t.tensor_name, "input")
+
+ def test_buffer(self):
+ t = Tensor()
+ o = object()
+ t.buffer = o
+ self.assertEqual(t.buffer, o)
+
+ def test_buffer_index(self):
+ t = Tensor()
+ t.buffer_index = 1000
+ self.assertEqual(t.buffer_index, 1000)
+
+ def test_type_name(self):
+ t = Tensor()
+ t.type_name = "FLOAT32"
+ self.assertEqual(t.type_name, "FLOAT32")
+
+ def test_shape(self):
+ t = Tensor()
+ t.shape = [1, 2, 3, 4]
+ self.assertEqual(t.shape, [1, 2, 3, 4])
+
+ def test_memory_size(self):
+ t = Tensor()
+ t.memory_size = 1000
+ self.assertEqual(t.memory_size, 1000)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tflite_parser.py b/tools/tflitefile_tool/tests/test_tflite_parser.py
new file mode 100644
index 000000000..dd1447a8a
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tflite_parser.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import tflite.Model
+from parser.tflite.tflite_parser import TFLiteParser, TFLiteSubgraphParser
+from .test_setup import TEST_MODEL_PATH
+
+
+class TFLiteSubgraphParserTestCase(unittest.TestCase):
+ def setUp(self):
+ self.model_file = open(TEST_MODEL_PATH, 'rb')
+
+ def tearDown(self):
+ self.model_file.close()
+
+ def test_Parse(self):
+ buf = bytearray(self.model_file.read())
+ tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+ for subgraph_index in range(tf_model.SubgraphsLength()):
+ tf_subgraph = tf_model.Subgraphs(subgraph_index)
+ subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+ subg = subg_parser.Parse()
+ self.assertEqual(subg.index, subgraph_index)
+ self.assertEqual(len(subg.inputs), tf_subgraph.InputsLength())
+ self.assertEqual(len(subg.outputs), tf_subgraph.OutputsLength())
+ # if there is optional tensors, this assert could be wrong
+ self.assertEqual(len(subg.tensors_map.keys()), tf_subgraph.TensorsLength())
+ self.assertEqual(
+ len(subg.operators_map.keys()), tf_subgraph.OperatorsLength())
+ # because TEST_MODEL_PATH has an op(ADD)
+ self.assertEqual(len(subg.optypes_map.keys()), tf_subgraph.OperatorsLength())
+
+
+class TFLiteParserTestCase(unittest.TestCase):
+ def setUp(self):
+ self.model_file = open(TEST_MODEL_PATH, 'rb')
+ self.parser = TFLiteParser(self.model_file)
+
+ def tearDown(self):
+ self.model_file.close()
+
+ def test_Parse(self):
+ subg_list = self.parser.Parse()
+ self.assertIsNotNone(subg_list)
+ self.assertEqual(len(subg_list), 1)
+
+
+if __name__ == '__main__':
+ unittest.main()