summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.template6
-rw-r--r--compiler/circle-quantizer/CMakeLists.txt4
-rw-r--r--compiler/circle-quantizer/include/CircleExpContract.h49
-rw-r--r--compiler/circle-quantizer/src/CircleQuantizer.cpp60
-rw-r--r--compiler/circle2circle-dredd-recipe-test/test.lst1
-rw-r--r--compiler/circle2circle/include/CircleExpContract.h50
-rw-r--r--compiler/circle2circle/src/Circle2Circle.cpp32
-rw-r--r--compiler/circlechef/circle/CMakeLists.txt1
-rw-r--r--compiler/circlechef/circle/src/CircleImport.h65
-rw-r--r--compiler/circlechef/core/src/ModelChef.cpp45
-rw-r--r--compiler/circledump/src/OpPrinter.cpp1
-rw-r--r--compiler/common-artifacts/CMakeLists.txt16
-rw-r--r--compiler/common-artifacts/exclude.lst2
-rw-r--r--compiler/locomotiv/src/Node/BiasAdd.cpp25
-rw-r--r--compiler/locomotiv/src/Node/BiasEncode.cpp13
-rw-r--r--compiler/locomotiv/src/Node/ConstGen.cpp13
-rw-r--r--compiler/locomotiv/src/Node/Conv2D.cpp13
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseConv2D.cpp13
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp13
-rw-r--r--compiler/locomotiv/src/Node/FeatureDecode.cpp13
-rw-r--r--compiler/locomotiv/src/Node/FilterEncode.cpp13
-rw-r--r--compiler/locomotiv/src/Node/Forward.cpp13
-rw-r--r--compiler/locomotiv/src/Node/MatMul.cpp13
-rw-r--r--compiler/locomotiv/src/Node/MatrixDecode.cpp13
-rw-r--r--compiler/locomotiv/src/Node/MaxPool2D.cpp13
-rw-r--r--compiler/locomotiv/src/Node/Pull.cpp13
-rw-r--r--compiler/locomotiv/src/Node/Push.cpp13
-rw-r--r--compiler/locomotiv/src/Node/Reshape.cpp16
-rw-r--r--compiler/locomotiv/src/Node/Softmax.cpp13
-rw-r--r--compiler/locomotiv/src/Node/TensorBroadcast.cpp16
-rw-r--r--compiler/locomotiv/src/Node/TensorConcat.cpp13
-rw-r--r--compiler/locomotiv/src/Node/TensorConstantPad.cpp13
-rw-r--r--compiler/locomotiv/src/Node/TensorReduce.cpp13
-rw-r--r--compiler/locomotiv/src/Node/TransposedConv2D.cpp13
-rw-r--r--compiler/luci-interpreter/src/CMakeLists.txt12
-rw-r--r--compiler/luci-interpreter/src/kernels/ArgMax.test.cpp15
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.cpp17
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp74
-rw-r--r--compiler/luci-interpreter/src/kernels/CMakeLists.txt12
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.cpp14
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.test.cpp140
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.cpp22
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.h7
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.cpp7
-rw-r--r--compiler/luci-interpreter/src/kernels/Rsqrt.cpp66
-rw-r--r--compiler/luci-interpreter/src/kernels/Rsqrt.h46
-rw-r--r--compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp83
-rw-r--r--compiler/luci-interpreter/src/kernels/Sqrt.cpp66
-rw-r--r--compiler/luci-interpreter/src/kernels/Sqrt.h46
-rw-r--r--compiler/luci-interpreter/src/kernels/Sqrt.test.cpp83
-rw-r--r--compiler/luci-interpreter/src/kernels/Tanh.cpp93
-rw-r--r--compiler/luci-interpreter/src/kernels/Tanh.h52
-rw-r--r--compiler/luci-interpreter/src/kernels/Tanh.test.cpp108
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.cpp14
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.h3
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp58
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.h5
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.h3
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp48
-rw-r--r--compiler/luci-value-test/tester/CMakeLists.txt2
-rw-r--r--compiler/luci-value-test/tester/src/CircleExpContract.h49
-rw-r--r--compiler/luci-value-test/tester/src/EvalTester.cpp8
-rw-r--r--compiler/luci/export/include/luci/CircleFileExpContract.h (renamed from compiler/record-minmax/src/CircleExpContract.h)32
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.cpp1270
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes.h2
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h35
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h37
-rw-r--r--compiler/luci/import/src/GraphBuilderRegistry.cpp4
-rw-r--r--compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp39
-rw-r--r--compiler/luci/import/src/Nodes/CircleConst.cpp4
-rw-r--r--compiler/luci/import/src/Nodes/CircleMaximum.cpp34
-rw-r--r--compiler/luci/import/src/Nodes/CircleMinimum.cpp34
-rw-r--r--compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp126
-rw-r--r--compiler/luci/import/src/Nodes/CirclePadV2.cpp52
-rw-r--r--compiler/luci/import/src/Nodes/CircleReduceMax.cpp25
-rw-r--r--compiler/luci/import/src/Nodes/CircleReduceMin.cpp25
-rw-r--r--compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp39
-rw-r--r--compiler/luci/import/src/Nodes/CircleSparseToDense.cpp3
-rw-r--r--compiler/luci/import/src/Nodes/CircleTransposeConv.cpp13
-rw-r--r--compiler/luci/import/src/ValidateHelpers.cpp121
-rw-r--r--compiler/luci/import/src/ValidateHelpers.h35
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.h2
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.lst2
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h56
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h51
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h2
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h18
-rw-r--r--compiler/luci/lang/src/Nodes/CircleConst.cpp1
-rw-r--r--compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp101
-rw-r--r--compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp32
-rw-r--r--compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp2
-rw-r--r--compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp4
-rw-r--r--compiler/luci/logex/src/FormattedGraph.cpp1248
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h2
-rw-r--r--compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h37
-rw-r--r--compiler/luci/pass/include/luci/Pass/RequantizePass.h (renamed from runtime/onert/backend/cpu/ops/ReLULayer.h)49
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp36
-rw-r--r--compiler/luci/pass/src/FuseBCQPass.cpp4
-rw-r--r--compiler/luci/pass/src/FuseBatchNormWithTConv.cpp159
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp7
-rw-r--r--compiler/luci/pass/src/RequantizePass.cpp241
-rw-r--r--compiler/luci/service/src/CircleShapeInferenceRule.cpp3210
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.cpp21
-rw-r--r--compiler/luci/tests/test.lst28
-rw-r--r--compiler/one-cmds/one-import-tf24
-rw-r--r--compiler/one-cmds/one-prepare-venv4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json10
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json64
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json34
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json14
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json58
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json76
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json14
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json100
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json48
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json60
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json4
-rw-r--r--compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json4
-rw-r--r--compiler/pota-quantization-value-test/test.lst4
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt1
-rw-r--r--compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt1
-rw-r--r--compiler/record-minmax/src/RecordMinMax.cpp14
-rw-r--r--compiler/souschef/CMakeLists.txt8
-rw-r--r--compiler/souschef/include/souschef/Dataset.h17
-rw-r--r--compiler/souschef/include/souschef/Dims.h48
-rw-r--r--compiler/souschef/include/souschef/TensorFiller.h96
-rw-r--r--compiler/souschef/src/Dims.cpp (renamed from compiler/luci-value-test/tester/src/CircleExpContract.cpp)18
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp45
-rw-r--r--compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp (renamed from compiler/record-minmax/src/CircleExpContract.cpp)26
-rw-r--r--compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h52
-rw-r--r--compiler/tflchef/core/src/OpChef.def1
-rw-r--r--compiler/tflchef/core/src/OpChefs.h1
-rw-r--r--compiler/tflchef/proto/tflchef.proto6
-rw-r--r--compiler/tflchef/tflite/CMakeLists.txt1
-rw-r--r--compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp59
-rw-r--r--compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h39
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.h65
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpChefs.h1
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpRegistry.h1
-rw-r--r--compiler/tfldump/src/OpPrinter.cpp2
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions.h1
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp (renamed from compiler/circle2circle/src/CircleExpContract.cpp)23
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h32
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp79
-rw-r--r--compiler/tflite2circle/src/DataLookup.cpp75
-rw-r--r--compiler/tflite2circle/src/DataLookup.h19
-rw-r--r--compiler/tflite2circle/src/TFLBuiltinOptions.lst2
-rw-r--r--compiler/vconone/CMakeLists.txt2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp16
-rw-r--r--compute/cker/include/cker/Types.h3
-rw-r--r--compute/cker/include/cker/eigen/eigen_convolution_helpers.h30
-rw-r--r--compute/cker/include/cker/eigen/eigen_spatial_convolutions.h30
-rw-r--r--compute/cker/include/cker/neon/neon_check.h29
-rw-r--r--compute/cker/include/cker/operation/AveragePool.h17
-rw-r--r--compute/cker/include/cker/operation/Conv.h41
-rw-r--r--compute/cker/include/cker/operation/Erf.h (renamed from runtime/onert/core/src/ir/operation/Sin.cc)31
-rw-r--r--compute/cker/include/cker/operation/LogSoftMax.h56
-rw-r--r--compute/cker/include/cker/operation/MaxPool.h15
-rw-r--r--compute/cker/include/cker/operation/SoftMax.h38
-rw-r--r--compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h124
-rw-r--r--compute/cker/include/cker/operation/optimized/OptimizedUtils.h103
-rw-r--r--compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h4
-rw-r--r--compute/test/CMakeLists.txt4
-rw-r--r--docs/conf.py2
-rw-r--r--docs/howto/how-to-introduce-a-new-operation-into-runtime.md57
-rw-r--r--docs/release/1.9/release-note-1.9.0.md38
-rw-r--r--docs/runtime/compute.md12
-rw-r--r--infra/3rdparty/Eigen/fd6845384b86/URL.default2
-rw-r--r--infra/cmake/packages/BoostSourceConfig.cmake2
-rw-r--r--infra/cmake/packages/EigenSourceConfig.cmake2
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake20
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake20
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake10
-rw-r--r--infra/nncc/CMakeLists.txt1
-rw-r--r--infra/nnfw/cmake/CfgOptionFlags.cmake1
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-android.cmake6
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/options/options_armv7l-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/packages/BoostConfig.cmake11
-rw-r--r--infra/nnfw/cmake/packages/HDF5Config.cmake5
-rw-r--r--infra/nnfw/command/copyright-check7
-rwxr-xr-xinfra/scripts/build-tcm.sh2
-rwxr-xr-xinfra/scripts/docker_build_nncc.sh4
-rwxr-xr-xinfra/scripts/tizen_xu4_test.sh13
-rw-r--r--packaging/nnfw.spec4
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule7
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe52
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe52
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe66
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe66
-rw-r--r--res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/PadV2_000/test.recipe40
-rw-r--r--res/TensorFlowLiteRecipes/PadV2_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe19
-rw-r--r--res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Unique_000/test.recipe2
-rw-r--r--res/TensorFlowLiteRecipes/Unique_001/test.recipe2
-rw-r--r--res/TensorFlowLiteRecipes/Unique_002/test.recipe2
-rw-r--r--res/TensorFlowLiteRecipes/Unique_003/test.recipe2
-rw-r--r--res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe2
-rw-r--r--res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe2
-rw-r--r--res/TensorFlowPythonExamples/examples/PadV2/__init__.py8
-rw-r--r--res/TensorFlowPythonExamples/examples/gelu/__init__.py7
-rw-r--r--res/TensorFlowPythonExamples/examples/gelu_2/__init__.py7
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py8
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py13
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py10
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py14
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/contrib/android/api/src/main/native/onert-native-api.h16
-rw-r--r--runtime/libs/benchmark/include/benchmark/Phases.h4
-rw-r--r--runtime/libs/benchmark/src/MemoryPoller.cpp29
-rw-r--r--runtime/libs/benchmark/src/Phases.cpp20
-rw-r--r--runtime/onert/api/include/nnfw.h2
-rw-r--r--runtime/onert/api/include/nnfw_experimental.h34
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc13
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc65
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h11
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h10
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc99
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h32
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc1238
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h37
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_cl/TensorManager.h2
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.cc128
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h61
-rw-r--r--runtime/onert/backend/acl_common/AclFunction.h6
-rw-r--r--runtime/onert/backend/acl_common/AclKernelGen.h149
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h42
-rw-r--r--runtime/onert/backend/acl_common/AclTensorRegistry.h59
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc65
-rw-r--r--runtime/onert/backend/acl_common/Convert.h7
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h9
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.cc97
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h26
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc1249
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h36
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_neon/TensorManager.h2
-rw-r--r--runtime/onert/backend/cpu/Backend.h8
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h6
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc4
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h8
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc851
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h34
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc31
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h26
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.cc166
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.cc118
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.h75
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc225
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h (renamed from runtime/onert/backend/cpu/ops/DivLayer.h)28
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.cc112
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc21
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DivLayer.cc95
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc173
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h (renamed from runtime/onert/backend/cpu/ops/TanhLayer.h)29
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc151
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxLayer.h)26
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc336
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h (renamed from runtime/onert/backend/cpu/ops/ReLU6Layer.h)38
-rw-r--r--runtime/onert/backend/cpu/ops/ExpLayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc26
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h3
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.cc64
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.cc76
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.h59
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.cc108
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.h60
-rw-r--r--runtime/onert/backend/cpu/ops/MaxLayer.cc85
-rw-r--r--runtime/onert/backend/cpu/ops/MaxPoolLayer.cc115
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.cc89
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.h61
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.cc116
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.h65
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.cc132
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxPoolLayer.h)37
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.cc63
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.cc (renamed from runtime/onert/backend/cpu/ops/RoundLayer.cc)24
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.h (renamed from runtime/onert/backend/cpu/ops/ZerosLikeLayer.h)12
-rw-r--r--runtime/onert/backend/cpu/ops/ReLU6Layer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLULayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.cc90
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/RoundLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.cc69
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.h53
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/SoftMaxLayer.cc50
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.cc162
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/TanhLayer.cc103
-rw-r--r--runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc63
-rw-r--r--runtime/onert/core/include/backend/BackendContext.h9
-rw-r--r--runtime/onert/core/include/backend/IConstantInitializer.h9
-rw-r--r--runtime/onert/core/include/backend/ITensorBuilder.h50
-rw-r--r--runtime/onert/core/include/backend/ITensorRegistry.h31
-rw-r--r--runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h5
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h90
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInference.h52
-rw-r--r--runtime/onert/core/include/exec/DynamicShapeInference.h35
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h2
-rw-r--r--runtime/onert/core/include/exec/IODescription.h4
-rw-r--r--runtime/onert/core/include/ir/Graph.h8
-rw-r--r--runtime/onert/core/include/ir/InternalType.h6
-rw-r--r--runtime/onert/core/include/ir/LoweredGraph.h87
-rw-r--r--runtime/onert/core/include/ir/OpSequences.h15
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h38
-rw-r--r--runtime/onert/core/include/ir/Operations.lst38
-rw-r--r--runtime/onert/core/include/ir/Padding.h3
-rw-r--r--runtime/onert/core/include/ir/operation/Abs.h49
-rw-r--r--runtime/onert/core/include/ir/operation/BinaryArithmetic.h (renamed from runtime/onert/core/include/ir/operation/Add.h)25
-rw-r--r--runtime/onert/core/include/ir/operation/BroadcastTo.h2
-rw-r--r--runtime/onert/core/include/ir/operation/Cast.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Conv2D.h1
-rw-r--r--runtime/onert/core/include/ir/operation/Dequantize.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Einsum.h2
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseActivation.h (renamed from runtime/onert/core/include/ir/operation/Div.h)37
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseBinary.h (renamed from runtime/onert/core/include/ir/operation/Mul.h)27
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseUnary.h (renamed from runtime/onert/core/include/ir/operation/MaxPool2D.h)48
-rw-r--r--runtime/onert/core/include/ir/operation/Exp.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Floor.h51
-rw-r--r--runtime/onert/core/include/ir/operation/Log.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalAnd.h50
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalNot.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalOr.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Logistic.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Max.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Mean.h61
-rw-r--r--runtime/onert/core/include/ir/operation/Min.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Neg.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Pool2D.h (renamed from runtime/onert/core/include/ir/operation/AvgPool2D.h)26
-rw-r--r--runtime/onert/core/include/ir/operation/Quantize.h49
-rw-r--r--runtime/onert/core/include/ir/operation/RSQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Rank.h (renamed from runtime/onert/core/include/ir/operation/Cos.h)14
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU1.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU6.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h (renamed from runtime/onert/core/include/ir/operation/L2Pool2D.h)26
-rw-r--r--runtime/onert/core/include/ir/operation/Round.h49
-rw-r--r--runtime/onert/core/include/ir/operation/SQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Select.h2
-rw-r--r--runtime/onert/core/include/ir/operation/Sin.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Sub.h62
-rw-r--r--runtime/onert/core/include/ir/operation/Tanh.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ZerosLike.h49
-rw-r--r--runtime/onert/core/include/util/Config.lst2
-rw-r--r--runtime/onert/core/include/util/Exceptions.h (renamed from runtime/onert/backend/cpu/ops/ExpLayer.h)45
-rw-r--r--runtime/onert/core/include/util/ShapeInference.h14
-rw-r--r--runtime/onert/core/src/backend/controlflow/Backend.h9
-rw-r--r--runtime/onert/core/src/backend/controlflow/ConstantInitializer.h10
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc31
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h13
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.cc74
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.h18
-rw-r--r--runtime/onert/core/src/backend/controlflow/Tensor.h (renamed from runtime/onert/core/src/ir/operation/Log.cc)24
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.cc52
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.h24
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorRegistry.h134
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc6
-rw-r--r--runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc12
-rw-r--r--runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc8
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc10
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc98
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h27
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc2
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.h6
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc38
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h2
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc14
-rw-r--r--runtime/onert/core/src/compiler/Linear.h8
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc (renamed from runtime/onert/core/src/ir/LoweredGraph.cc)157
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc7
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.cc166
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.h21
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInference.cc406
-rw-r--r--runtime/onert/core/src/compiler/TensorBuilders.h11
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h91
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc (renamed from runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc)12
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h (renamed from runtime/onert/core/src/ir/pass/ConstantInsertionPass.h)21
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc (renamed from runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc)12
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h (renamed from runtime/onert/core/src/ir/pass/ConstantLoweringPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperandPass.h (renamed from runtime/onert/core/src/ir/pass/LoweredOperandPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperationPass.h (renamed from runtime/onert/core/src/ir/pass/LoweredOperationPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/OperandPass.cc (renamed from runtime/onert/core/src/ir/pass/OperandPass.cc)6
-rw-r--r--runtime/onert/core/src/compiler/pass/OperandPass.h (renamed from runtime/onert/core/src/ir/pass/OperandPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.cc (renamed from runtime/onert/core/src/ir/pass/OperationPass.cc)6
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.h (renamed from runtime/onert/core/src/ir/pass/OperationPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/Pass.h (renamed from runtime/onert/core/src/ir/pass/Pass.h)16
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc)32
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h (renamed from runtime/onert/core/src/ir/pass/PermutationEliminationPass.h)16
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc)30
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h (renamed from runtime/onert/core/src/ir/pass/PermutationInsertionPass.h)20
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationOperationPass.cc)76
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.h64
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h6
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc8
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInference.cc113
-rw-r--r--runtime/onert/core/src/exec/Execution.cc13
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc81
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h12
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc3
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h8
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc9
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.h15
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.h88
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.h15
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.h88
-rw-r--r--runtime/onert/core/src/interp/InterpOps.lst19
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc75
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc (renamed from runtime/onert/core/src/interp/operations/UnaryActivations.cc)82
-rw-r--r--runtime/onert/core/src/interp/operations/Logistic.cc99
-rw-r--r--runtime/onert/core/src/interp/operations/MaxPool2D.cc125
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc (renamed from runtime/onert/core/src/interp/operations/AvgPool2D.cc)86
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc39
-rw-r--r--runtime/onert/core/src/ir/Graph.cc20
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.cc2
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.h12
-rw-r--r--runtime/onert/core/src/ir/OpSequences.cc16
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc590
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h36
-rw-r--r--runtime/onert/core/src/ir/Padding.cc20
-rw-r--r--runtime/onert/core/src/ir/operation/Abs.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/AvgPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/BinaryArithmetic.cc (renamed from runtime/onert/core/src/ir/operation/Add.cc)22
-rw-r--r--runtime/onert/core/src/ir/operation/Cast.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Dequantize.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Div.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseActivation.cc72
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseBinary.cc52
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc65
-rw-r--r--runtime/onert/core/src/ir/operation/Exp.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Floor.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/L2Pool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalAnd.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalNot.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalOr.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Logistic.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Max.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/MaxPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Min.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Mul.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Neg.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Pad.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/Pool2D.cc51
-rw-r--r--runtime/onert/core/src/ir/operation/RSQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Rank.cc (renamed from runtime/onert/core/src/ir/operation/Cos.cc)6
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU1.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU6.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc (renamed from runtime/onert/core/src/ir/operation/Round.cc)10
-rw-r--r--runtime/onert/core/src/ir/operation/SQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Sub.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Tanh.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ZerosLike.cc39
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationOperationPass.h70
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc297
-rw-r--r--runtime/onert/core/src/util/EventRecorder.h4
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc54
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h557
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc6
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc696
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc6
-rw-r--r--runtime/onert/test/core/compiler/Scheduler.cc38
-rw-r--r--runtime/onert/test/core/exec/ExecInstance.cc14
-rw-r--r--runtime/onert/test/core/interp/ExecManager.cc26
-rw-r--r--runtime/onert/test/util/ShapeInference.cc39
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl10
-rw-r--r--tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon10
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl10
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon10
-rw-r--r--tests/nnapi/nnapi_gtest.skip.noarch.interp10
-rw-r--r--tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py69
-rw-r--r--tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py23
-rw-r--r--tests/nnfw_api/CMakeLists.txt5
-rw-r--r--tests/nnfw_api/src/CircleGen.cc240
-rw-r--r--tests/nnfw_api/src/CircleGen.h142
-rw-r--r--tests/nnfw_api/src/GenModelTest.h220
-rw-r--r--tests/nnfw_api/src/GenModelTests.cc152
-rw-r--r--tests/nnfw_api/src/ModelTestDynamicTensor.cc141
-rw-r--r--tests/nnfw_api/src/NNPackages.cc3
-rw-r--r--tests/nnfw_api/src/NNPackages.h5
-rw-r--r--tests/nnfw_api/src/RegressionTests.cc26
-rw-r--r--tests/nnfw_api/src/ValidationTestAddModelLoaded.cc33
-rw-r--r--tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc6
-rw-r--r--tests/nnfw_api/src/ValidationTestSessionCreated.cc2
-rw-r--r--tests/nnfw_api/src/ValidationTestSingleSession.cc4
-rw-r--r--tests/nnfw_api/src/fixtures.h2
-rw-r--r--tests/nnfw_api/src/one_op_tests/Add.cc103
-rw-r--r--tests/nnfw_api/src/one_op_tests/AveragePool2D.cc49
-rw-r--r--tests/nnfw_api/src/one_op_tests/Cos.cc50
-rw-r--r--tests/nnfw_api/src/one_op_tests/L2Normalization.cc35
-rw-r--r--tests/nnfw_api/src/one_op_tests/LeakyRelu.cc (renamed from runtime/onert/core/src/ir/operation/Quantize.cc)27
-rw-r--r--tests/nnfw_api/src/one_op_tests/Pad.cc92
-rw-r--r--tests/nnfw_api/src/one_op_tests/PadV2.cc113
-rw-r--r--tests/nnfw_api/src/one_op_tests/Rank.cc63
-rw-r--r--tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc38
-rw-r--r--tests/nnfw_api/src/one_op_tests/While.cc75
-rw-r--r--tests/scripts/CMakeLists.txt10
-rw-r--r--tests/scripts/command/prepare-model2
-rwxr-xr-xtests/scripts/common.sh2
-rw-r--r--tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl69
-rw-r--r--tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon69
-rw-r--r--tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu59
-rw-r--r--tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn5
-rw-r--r--tests/scripts/list/nnpkg_test_list.noarch.interp61
-rw-r--r--tests/scripts/models/nnfw_api_gtest/add/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/add/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/add_invalid_manifest/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/add_no_manifest/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/add_unspecified_rank_inputs/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/dynamic_tensor_reshape/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/if_dynamic/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/input_reshaping_add/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/neg/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/neg/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/unknown_dim_input_concat/config.sh)0
-rw-r--r--tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh (renamed from tests/scripts/nnfw_api_gtest/models/while_dynamic/config.sh)0
-rwxr-xr-xtests/scripts/models/run_test.sh4
-rwxr-xr-xtests/scripts/models/tflite/MODELS/inception_module/config.sh (renamed from tests/scripts/models/config/MODELS/inception_module/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/MODELS/inception_nonslim/config.sh (renamed from tests/scripts/models/config/MODELS/inception_nonslim/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/MODELS/inception_slim/config.sh (renamed from tests/scripts/models/config/MODELS/inception_slim/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/MODELS/mobilenet/config.sh (renamed from tests/scripts/models/config/MODELS/mobilenet/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh (renamed from tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/abs/config.sh (renamed from tests/scripts/models/config/abs/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/add/1D/config.sh (renamed from tests/scripts/models/config/add/1D/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/add/4D/config.sh (renamed from tests/scripts/models/config/add/4D/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/average_pool_2d/aligned/config.sh (renamed from tests/scripts/models/config/average_pool_2d/aligned/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/average_pool_2d/avgpool1/config.sh (renamed from tests/scripts/models/config/average_pool_2d/avgpool1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/average_pool_2d/avgpool2/config.sh (renamed from tests/scripts/models/config/average_pool_2d/avgpool2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/batch_to_space_nd2/config.sh (renamed from tests/scripts/models/config/batch_to_space_nd2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/cast/config.sh (renamed from tests/scripts/models/config/cast/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/concat/2D/config.sh (renamed from tests/scripts/models/config/concat/2D/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/concat/concat1/config.sh (renamed from tests/scripts/models/config/concat/concat1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/concat/concat2/config.sh (renamed from tests/scripts/models/config/concat/concat2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/conv_2d/convolution1/config.sh (renamed from tests/scripts/models/config/conv_2d/convolution1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/conv_2d/convolution2/config.sh (renamed from tests/scripts/models/config/conv_2d/convolution2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/custom/squared_difference/config.sh (renamed from tests/scripts/models/config/custom/squared_difference/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/depthwise_conv_2d/depthconv1/config.sh (renamed from tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/depthwise_conv_2d/depthconv2/config.sh (renamed from tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/depthwise_conv_2d_no_fuse/config.sh (renamed from tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/div/broadcast/config.sh (renamed from tests/scripts/models/config/div/broadcast/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/embedding_lookup/config.sh (renamed from tests/scripts/models/config/embedding_lookup/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/equal/config.sh (renamed from tests/scripts/models/config/equal/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/exp/config.sh (renamed from tests/scripts/models/config/exp/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/floor/floor1/config.sh (renamed from tests/scripts/models/config/floor/floor1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/floor/floor2/config.sh (renamed from tests/scripts/models/config/floor/floor2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/fullyconnected/fc1/config.sh (renamed from tests/scripts/models/config/fullyconnected/fc1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/fullyconnected/hybrid/config.sh (renamed from tests/scripts/models/config/fullyconnected/hybrid/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/fullyconnected/matmul2x2/config.sh (renamed from tests/scripts/models/config/fullyconnected/matmul2x2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/fullyconnected/weights_as_input/config.sh (renamed from tests/scripts/models/config/fullyconnected/weights_as_input/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/gather/config.sh (renamed from tests/scripts/models/config/gather/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/greater/config.sh (renamed from tests/scripts/models/config/greater/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/greater_equal/config.sh (renamed from tests/scripts/models/config/greater_equal/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/hashtable_lookup/config.sh (renamed from tests/scripts/models/config/hashtable_lookup/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/l2_normalization/config.sh (renamed from tests/scripts/models/config/l2_normalization/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/l2_pool_2d/config.sh (renamed from tests/scripts/models/config/l2_pool_2d/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/less/config.sh (renamed from tests/scripts/models/config/less/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/less_equal/config.sh (renamed from tests/scripts/models/config/less_equal/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/logistic/config.sh (renamed from tests/scripts/models/config/logistic/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/max/config.sh (renamed from tests/scripts/models/config/max/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/max_pool_2d/maxpool1/config.sh (renamed from tests/scripts/models/config/max_pool_2d/maxpool1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/max_pool_2d/maxpool2/config.sh (renamed from tests/scripts/models/config/max_pool_2d/maxpool2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/mean/config.sh (renamed from tests/scripts/models/config/mean/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/min/config.sh (renamed from tests/scripts/models/config/min/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/mul/broadcast/config.sh (renamed from tests/scripts/models/config/mul/broadcast/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/neg/config.sh (renamed from tests/scripts/models/config/neg/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/not_equal/config.sh (renamed from tests/scripts/models/config/not_equal/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/one_hot/config.sh (renamed from tests/scripts/models/config/one_hot/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/pack/config.sh (renamed from tests/scripts/models/config/pack/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/pad/4D_2D/config.sh (renamed from tests/scripts/models/config/pad/4D_2D/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/pad/pad1/config.sh (renamed from tests/scripts/models/config/pad/pad1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/pad/pad2/config.sh (renamed from tests/scripts/models/config/pad/pad2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reduce_max/config.sh (renamed from tests/scripts/models/config/reduce_max/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reduce_mean/test1/config.sh (renamed from tests/scripts/models/config/reduce_mean/test1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reduce_mean/test2/config.sh (renamed from tests/scripts/models/config/reduce_mean/test2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reduce_sum/float/config.sh (renamed from tests/scripts/models/config/reduce_sum/float/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reduce_sum/uint8/config.sh (renamed from tests/scripts/models/config/reduce_sum/uint8/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/relu/config.sh (renamed from tests/scripts/models/config/relu/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/relu6/config.sh (renamed from tests/scripts/models/config/relu6/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reshape/3D/config.sh (renamed from tests/scripts/models/config/reshape/3D/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reshape/reshape1/config.sh (renamed from tests/scripts/models/config/reshape/reshape1/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/reshape/reshape2/config.sh (renamed from tests/scripts/models/config/reshape/reshape2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/resize_bilinear/config.sh (renamed from tests/scripts/models/config/resize_bilinear/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/rnn/config.sh (renamed from tests/scripts/models/config/rnn/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/rsqrt/config.sh (renamed from tests/scripts/models/config/rsqrt/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/select/config.sh (renamed from tests/scripts/models/config/select/config.sh)0
-rw-r--r--tests/scripts/models/tflite/shape/config.sh (renamed from tests/scripts/models/config/shape/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/sin/config.sh (renamed from tests/scripts/models/config/sin/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/slice/config.sh (renamed from tests/scripts/models/config/slice/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/softmax/config.sh (renamed from tests/scripts/models/config/softmax/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/space_to_batch_nd2/config.sh (renamed from tests/scripts/models/config/space_to_batch_nd2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/space_to_depth/config.sh (renamed from tests/scripts/models/config/space_to_depth/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/sqrt/config.sh (renamed from tests/scripts/models/config/sqrt/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/squeeze/config.sh (renamed from tests/scripts/models/config/squeeze/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/strided_slice/config.sh (renamed from tests/scripts/models/config/strided_slice/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/sub/broadcast/config.sh (renamed from tests/scripts/models/config/sub/broadcast/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/tanh/config.sh (renamed from tests/scripts/models/config/tanh/config.sh)0
-rw-r--r--tests/scripts/models/tflite/tile/config.sh (renamed from tests/scripts/models/config/tile/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/topk_v2/config.sh (renamed from tests/scripts/models/config/topk_v2/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/transpose/config.sh (renamed from tests/scripts/models/config/transpose/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/transpose_conv/same/config.sh (renamed from tests/scripts/models/config/transpose_conv/same/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/transpose_conv/valid/config.sh (renamed from tests/scripts/models/config/transpose_conv/valid/config.sh)0
-rwxr-xr-xtests/scripts/models/tflite/zeros_like/config.sh (renamed from tests/scripts/models/config/zeros_like/config.sh)0
-rwxr-xr-xtests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh148
-rwxr-xr-xtests/scripts/test_scheduler_with_profiling.sh10
-rw-r--r--tests/tools/nnpackage_run/src/args.cc32
-rw-r--r--tests/tools/nnpackage_run/src/args.h19
-rw-r--r--tests/tools/nnpackage_run/src/h5formatter.cc63
-rw-r--r--tests/tools/nnpackage_run/src/h5formatter.h2
-rw-r--r--tests/tools/nnpackage_run/src/nnpackage_run.cc44
-rw-r--r--tests/tools/nnpackage_run/src/types.h (renamed from compiler/circle-quantizer/src/CircleExpContract.cpp)20
-rw-r--r--tests/tools/tflite_run/src/tflite_run.cc9
-rw-r--r--tools/nnpackage_tool/model2nnpkg/README.md6
-rwxr-xr-xtools/nnpackage_tool/model2nnpkg/model2nnpkg.sh16
-rwxr-xr-xtools/nnpackage_tool/tflite2circle/tflite2circle.sh4
-rwxr-xr-xtools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py2
-rw-r--r--tools/release_tool/README.md68
-rwxr-xr-xtools/release_tool/git_release.sh206
-rwxr-xr-xtools/release_tool/onert_version.sh54
-rwxr-xr-xtools/tflitefile_tool/model_parser.py6
-rw-r--r--tools/tflitefile_tool/requirements.txt2
-rwxr-xr-xtools/tflitefile_tool/select_operator.py38
-rw-r--r--tools/update_version/update-version51
679 files changed, 16237 insertions, 15934 deletions
diff --git a/Makefile.template b/Makefile.template
index a21937dea..1b2f564c0 100644
--- a/Makefile.template
+++ b/Makefile.template
@@ -154,14 +154,14 @@ runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
- tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C ${INSTALL_PATH} bin test unittest unittest_standalone
+ tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
acl_tar_internal: $(BUILD_FOLDER)
- tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib
+ tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
install_internal_acl:
# Workaround to install acl for test (ignore error when there is no file to copy)
- cp $(OVERLAY_FOLDER)/lib/* $(INSTALL_ALIAS)/lib || true
+ cp $(OVERLAY_FOLDER)/lib/libarm_compute* $(INSTALL_ALIAS)/lib || true
build_test_suite: install_internal install_internal_acl
@echo "packaging test suite"
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index 009bfabea..5075b13d5 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,8 +1,6 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
+set (SOURCES src/CircleQuantizer.cpp)
add_executable(circle-quantizer "${SOURCES}")
-target_include_directories(circle-quantizer PRIVATE include)
-target_include_directories(circle-quantizer PRIVATE src)
target_link_libraries(circle-quantizer foder)
target_link_libraries(circle-quantizer safemain)
target_link_libraries(circle-quantizer oops)
diff --git a/compiler/circle-quantizer/include/CircleExpContract.h b/compiler/circle-quantizer/include/CircleExpContract.h
deleted file mode 100644
index e888e4a12..000000000
--- a/compiler/circle-quantizer/include/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-#define __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 8d3a80c91..54b38a170 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,14 +14,13 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci/CircleOptimizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <oops/InternalExn.h>
#include <arser/arser.h>
@@ -37,6 +36,14 @@ using OptionHook = std::function<int(const char **)>;
using Algorithms = luci::CircleOptimizer::Options::Algorithm;
using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+void print_exclusive_options(void)
+{
+ std::cout << "Use only one of the 3 options below." << std::endl;
+ std::cout << " --quantize_dequantize_weights" << std::endl;
+ std::cout << " --quantize_with_minmax" << std::endl;
+ std::cout << " --requantize" << std::endl;
+}
+
void print_version(void)
{
std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
@@ -53,6 +60,7 @@ int entry(int argc, char **argv)
const std::string qdqw = "--quantize_dequantize_weights";
const std::string qwmm = "--quantize_with_minmax";
+ const std::string rq = "--requantize";
arser::Arser arser("circle-quantizer provides circle model quantization");
@@ -79,6 +87,14 @@ int entry(int argc, char **argv)
"Three arguments required: input_dtype(float32) "
"output_dtype(uint8) granularity(layer, channel)");
+ arser.add_argument(rq)
+ .nargs(2)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .help("Requantize a quantized model. "
+ "Two arguments required: input_dtype(int8) "
+ "output_dtype(uint8)");
+
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -95,6 +111,11 @@ int entry(int argc, char **argv)
if (arser[qdqw])
{
+ if (arser[qwmm] || arser[rq])
+ {
+ print_exclusive_options();
+ return 255;
+ }
auto values = arser.get<std::vector<std::string>>(qdqw);
if (values.size() != 3)
{
@@ -110,6 +131,11 @@ int entry(int argc, char **argv)
if (arser[qwmm])
{
+ if (arser[qdqw] || arser[rq])
+ {
+ print_exclusive_options();
+ return 255;
+ }
auto values = arser.get<std::vector<std::string>>(qwmm);
if (values.size() != 3)
{
@@ -123,12 +149,40 @@ int entry(int argc, char **argv)
options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
}
+ if (arser[rq])
+ {
+ if (arser[qwmm] || arser[qdqw])
+ {
+ print_exclusive_options();
+ return 255;
+ }
+ auto values = arser.get<std::vector<std::string>>(rq);
+ if (values.size() != 2)
+ {
+ std::cerr << arser;
+ return 255;
+ }
+ options->enable(Algorithms::Requantize);
+
+ options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
+ options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+ }
+
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
// Load model from the file
foder::FileLoader file_loader{input_path};
std::vector<char> model_data = file_loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
const circle::Model *circle_model = circle::GetModel(model_data.data());
if (circle_model == nullptr)
{
@@ -157,7 +211,7 @@ int entry(int argc, char **argv)
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(module.get(), output_path);
+ luci::CircleFileExpContract contract(module.get(), output_path);
if (!exporter.invoke(&contract))
{
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index 6328a64db..302c3a796 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -10,6 +10,7 @@
## TFLITE RECIPE
+Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
Add(Net_InstanceNorm_002 PASS fuse_instnorm)
Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
diff --git a/compiler/circle2circle/include/CircleExpContract.h b/compiler/circle2circle/include/CircleExpContract.h
deleted file mode 100644
index 313b16d22..000000000
--- a/compiler/circle2circle/include/CircleExpContract.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 849597b46..39ceade3a 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,14 +14,13 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci/CircleOptimizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <luci/UserSettings.h>
#include <oops/InternalExn.h>
@@ -61,6 +60,12 @@ int entry(int argc, char **argv)
arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
"Enable all optimize options");
+ arser.add_argument("--fuse_batchnorm_with_tconv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+
arser.add_argument("--fuse_bcq")
.nargs(0)
.required(false)
@@ -101,7 +106,7 @@ int entry(int argc, char **argv)
.nargs(0)
.required(false)
.default_value(false)
- .help("This will turn off operator vaidations. May help input model investigation.");
+ .help("This will turn off operator validations. May help input model investigation.");
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -125,6 +130,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::ResolveCustomOpBatchMatMul);
options->enable(Algorithms::ResolveCustomOpMatMul);
}
+ if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
+ options->enable(Algorithms::FuseBatchNormWithTConv);
if (arser.get<bool>("--fuse_bcq"))
options->enable(Algorithms::FuseBCQ);
if (arser.get<bool>("--fuse_instnorm"))
@@ -157,6 +164,14 @@ int entry(int argc, char **argv)
std::cerr << err.what() << std::endl;
return EXIT_FAILURE;
}
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
const circle::Model *circle_model = circle::GetModel(model_data.data());
if (circle_model == nullptr)
{
@@ -177,15 +192,20 @@ int entry(int argc, char **argv)
if (!luci::validate(graph))
{
- std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
- return 255;
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ std::cerr << "WARNING: Optimized graph is invalid" << std::endl;
+ else
+ {
+ std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
+ return 255;
+ }
}
}
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(module.get(), output_path);
+ luci::CircleFileExpContract contract(module.get(), output_path);
if (!exporter.invoke(&contract))
{
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt
index 75165ada3..2ca016b84 100644
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(circlechef_circle circlechef_proto)
target_link_libraries(circlechef_circle mio_circle)
target_link_libraries(circlechef_circle stdex)
target_link_libraries(circlechef_circle cwrap)
+target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h
index a8ef3ee44..23ca29beb 100644
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -19,6 +19,8 @@
#include <mio/circle/schema_generated.h>
+#include <souschef/TensorFiller.h>
+
#include <circlechef.pb.h>
#include <map>
@@ -40,7 +42,7 @@ bool is_custom(const circle::OperatorCode *opcode);
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
-class CircleImport
+class CircleImport : public souschef::TensorFiller
{
public:
CircleImport(const circle::Model *model);
@@ -63,63 +65,6 @@ public:
std::string opcode_name(const circle::Operator *op) const;
size_t buffer_info(const circle::Tensor *tensor, const uint8_t **buff_data);
- /**
- * @brief This will record the tensor by index, if it needs filler option,
- * such as kernel, bias.
- */
- void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
- /**
- * @brief This will store int32 filler values such as reshape information for the tensor
- */
- void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- _tensor_filler_vint32[tensor_index] = expvalues;
- }
-
- void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- _tensor_filler_vfloat[tensor_index] = expvalues;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index)
- {
- auto it = _tensor_filler.find(tensor_index);
- if (it != _tensor_filler.end())
- {
- return it->second;
- }
- return false;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a int array filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- auto it = _tensor_filler_vint32.find(tensor_index);
- if (it != _tensor_filler_vint32.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
- bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- auto it = _tensor_filler_vfloat.find(tensor_index);
- if (it != _tensor_filler_vfloat.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
private:
const CircleSubGraphs_t *_subgraphs{nullptr};
const CircleBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
std::vector<const circle::OperatorCode *> _op_codes{};
std::vector<int32_t> _inputs{};
std::vector<int32_t> _outputs{};
-
- std::map<uint32_t, bool> _tensor_filler{};
- std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
- std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
};
} // namespace circlechef
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp
index d81467d68..aa54678ec 100644
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
#include "OpChefs.h"
#include <souschef/Dataset.h>
+#include <souschef/Dims.h>
#include "Log.h"
@@ -41,52 +42,8 @@
#include <sstream>
#include <stdexcept>
-namespace
-{
-
using namespace souschef;
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- std::vector<T> res;
- for (const auto &elem : field)
- {
- res.emplace_back(elem);
- }
- return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const circlechef::TensorShape &shape)
-{
- std::vector<int32_t> res;
-
- for (auto &dim : shape.dim())
- {
- res.emplace_back(static_cast<int32_t>(dim));
- }
-
- return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
- return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
namespace
{
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index 3294bb23d..a0a063e79 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -725,6 +725,7 @@ OpPrinterRegistry::OpPrinterRegistry()
_op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
// There is no Option for NON_MAX_SUPPRESSION_V4
+ // There is no Option for NON_MAX_SUPPRESSION_V5
_op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index ef50e8d43..ec9e3cf85 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -33,10 +33,12 @@ set(REQUIREMENTS_FILE "requirements.txt")
set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
+# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
add_custom_command(
OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
- COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
)
@@ -46,7 +48,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
@@ -233,10 +235,10 @@ foreach(RECIPE IN ITEMS ${RECIPES})
set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-
+
set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
-
+
if(NOT DEFINED NO_TCGEN_${RECIPE})
# Generate input.h5, expected.h5
add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
@@ -244,7 +246,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
)
-
+
# Generate test directory
set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
add_custom_command(OUTPUT ${TC_DIRECTORY}
@@ -252,7 +254,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
DEPENDS ${NNPKG_PATH}
COMMENT "Generate ${RECIPE} nnpackage test directory"
)
-
+
# Move input hdf5 file to test directory
set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
@@ -260,7 +262,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
)
-
+
# Move expected hdf5 file to test directory
set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index fe9933ae0..886f607cf 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -96,6 +96,8 @@ tcgenerate(Mean_U8_000)
tcgenerate(Minimum_000)
tcgenerate(NonMaxSuppressionV4_000)
tcgenerate(NonMaxSuppressionV4_001)
+tcgenerate(NonMaxSuppressionV5_000)
+tcgenerate(NonMaxSuppressionV5_001)
tcgenerate(MirrorPad_000)
tcgenerate(Mul_U8_000)
tcgenerate(Neg_000)
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp
index dfe32ca92..b84fa7e3c 100644
--- a/compiler/locomotiv/src/Node/BiasAdd.cpp
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -41,10 +41,12 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+using namespace locomotiv;
+
+void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
{
validate(bias_add, "BiasAdd is nullptr");
@@ -63,7 +65,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
annot_domain(bias_add, annot_domain(bias_add->value()));
}
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
{
validate(bias_add, "BiasAdd is nullptr");
@@ -82,7 +84,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
annot_domain(bias_add, loco::Domain::Feature);
}
-} // namespace locomotiv
+} // namespace
namespace
{
@@ -123,3 +125,18 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
}
} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+ execute_node(bias_add);
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+ execute_node(bias_add);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp
index c2f2b44c0..21f00a495 100644
--- a/compiler/locomotiv/src/Node/BiasEncode.cpp
+++ b/compiler/locomotiv/src/Node/BiasEncode.cpp
@@ -23,10 +23,12 @@
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::BiasEncode *bias_enc)
+using namespace locomotiv;
+
+void execute_node(loco::BiasEncode *bias_enc)
{
auto input_data = annot_data(bias_enc->input());
@@ -60,4 +62,11 @@ void NodeExecution::execute(loco::BiasEncode *bias_enc)
annot_domain(bias_enc, loco::Domain::Bias);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc) { execute_node(bias_enc); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp
index 0360b9fef..96ffbc257 100644
--- a/compiler/locomotiv/src/Node/ConstGen.cpp
+++ b/compiler/locomotiv/src/Node/ConstGen.cpp
@@ -53,10 +53,12 @@ inline uint32_t offset_by_index(const Shape &shape, const Index &index)
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::ConstGen *constgen)
+using namespace locomotiv;
+
+void execute_node(loco::ConstGen *constgen)
{
uint32_t volume = 1;
@@ -113,4 +115,11 @@ void NodeExecution::execute(loco::ConstGen *constgen)
annot_domain(constgen, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen) { execute_node(constgen); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp
index 2e4185574..cdf0dfd56 100644
--- a/compiler/locomotiv/src/Node/Conv2D.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -139,10 +139,12 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Conv2D *conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::Conv2D *conv2d)
{
auto ifm_data = annot_data(conv2d->ifm());
auto ker_data = annot_data(conv2d->ker());
@@ -176,4 +178,11 @@ void NodeExecution::execute(loco::Conv2D *conv2d)
annot_domain(conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d) { execute_node(conv2d); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
index 92d5aa161..f39cd177e 100644
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -143,10 +143,12 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseConv2D *dw_conv2d)
{
auto ifm_data = annot_data(dw_conv2d->ifm());
auto ker_data = annot_data(dw_conv2d->ker());
@@ -182,4 +184,11 @@ void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
annot_domain(dw_conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) { execute_node(dw_conv2d); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
index 17004901f..03f5bf833 100644
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -79,10 +79,12 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseFilterEncode *enc)
{
auto input_data = annot_data(enc->input());
@@ -110,4 +112,11 @@ void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
annot_domain(enc, loco::Domain::DepthwiseFilter);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) { execute_node(enc); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp
index 8a56a56b2..8776e1b42 100644
--- a/compiler/locomotiv/src/Node/FeatureDecode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -72,10 +72,12 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::FeatureDecode *dec)
+using namespace locomotiv;
+
+void execute_node(loco::FeatureDecode *dec)
{
auto input_data = annot_data(dec->input());
@@ -109,4 +111,11 @@ void NodeExecution::execute(loco::FeatureDecode *dec)
annot_domain(dec, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec) { execute_node(dec); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp
index cd9d708dc..0e2ac918f 100644
--- a/compiler/locomotiv/src/Node/FilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -74,10 +74,12 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::FilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::FilterEncode *enc)
{
auto input_data = annot_data(enc->input());
@@ -111,4 +113,11 @@ void NodeExecution::execute(loco::FilterEncode *enc)
annot_domain(enc, loco::Domain::Filter);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc) { execute_node(enc); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp
index eb7d44a59..9095ecf00 100644
--- a/compiler/locomotiv/src/Node/Forward.cpp
+++ b/compiler/locomotiv/src/Node/Forward.cpp
@@ -23,10 +23,12 @@
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Forward *forward)
+using namespace locomotiv;
+
+void execute_node(loco::Forward *forward)
{
auto input_data = annot_data(forward->input());
@@ -59,4 +61,11 @@ void NodeExecution::execute(loco::Forward *forward)
annot_domain(forward, annot_domain(forward->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward) { execute_node(forward); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp
index 77b7315a9..e5d149ac5 100644
--- a/compiler/locomotiv/src/Node/MatMul.cpp
+++ b/compiler/locomotiv/src/Node/MatMul.cpp
@@ -82,10 +82,12 @@ template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buf
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MatMul *mat_mul)
+using namespace locomotiv;
+
+void execute_node(loco::MatMul *mat_mul)
{
auto lhs_data = annot_data(mat_mul->lhs());
auto rhs_data = annot_data(mat_mul->rhs());
@@ -130,4 +132,11 @@ void NodeExecution::execute(loco::MatMul *mat_mul)
annot_domain(mat_mul, loco::Domain::Matrix);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul) { execute_node(mat_mul); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp
index c591676ae..0310015f1 100644
--- a/compiler/locomotiv/src/Node/MatrixDecode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -68,10 +68,12 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+using namespace locomotiv;
+
+void execute_node(loco::MatrixDecode *matrix_dec)
{
auto input_data = annot_data(matrix_dec->input());
@@ -106,4 +108,11 @@ void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
annot_domain(matrix_dec, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec) { execute_node(matrix_dec); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp
index 5d92f89f5..8dce1cb1e 100644
--- a/compiler/locomotiv/src/Node/MaxPool2D.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -129,10 +129,12 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+using namespace locomotiv;
+
+void execute_node(loco::MaxPool2D *maxpool2d)
{
auto ifm_data = annot_data(maxpool2d->ifm());
@@ -164,4 +166,11 @@ void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
annot_domain(maxpool2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp
index c482d8b04..fe5d7c2e1 100644
--- a/compiler/locomotiv/src/Node/Pull.cpp
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -24,10 +24,12 @@
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Pull *pull)
+using namespace locomotiv;
+
+void execute_node(loco::Pull *pull)
{
// TODO Remove deprecated code
#if 0
@@ -69,4 +71,11 @@ void NodeExecution::execute(loco::Pull *pull)
annot_domain(pull, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull) { execute_node(pull); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp
index fc5808b15..4e1c6c3b8 100644
--- a/compiler/locomotiv/src/Node/Push.cpp
+++ b/compiler/locomotiv/src/Node/Push.cpp
@@ -23,10 +23,12 @@
#include <stdexcept>
#include <cassert>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Push *push)
+using namespace locomotiv;
+
+void execute_node(loco::Push *push)
{
auto from_data = annot_data(push->from());
@@ -58,4 +60,11 @@ void NodeExecution::execute(loco::Push *push)
annot_domain(push, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push) { execute_node(push); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp
index ac1672024..a9c07bee1 100644
--- a/compiler/locomotiv/src/Node/Reshape.cpp
+++ b/compiler/locomotiv/src/Node/Reshape.cpp
@@ -36,10 +36,12 @@ using nncc::core::ADT::tensor::num_elements;
#include <cstring>
#include <vector>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+using namespace locomotiv;
+
+void execute_node(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
{
auto input_data = annot_data(reshape->input());
@@ -87,4 +89,14 @@ void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
annot_domain(reshape, annot_domain(reshape->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+ execute_node(reshape);
+}
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp
index 352598b27..0018eb66f 100644
--- a/compiler/locomotiv/src/Node/Softmax.cpp
+++ b/compiler/locomotiv/src/Node/Softmax.cpp
@@ -65,10 +65,12 @@ Shape reduce_shape(const Shape &shape, uint32_t axis)
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorSoftmax *softmax)
+using namespace locomotiv;
+
+void execute_node(loco::TensorSoftmax *softmax)
{
auto input_data = annot_data(softmax->input());
@@ -119,4 +121,11 @@ void NodeExecution::execute(loco::TensorSoftmax *softmax)
annot_domain(softmax, annot_domain(softmax->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax) { execute_node(softmax); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
index 010ca6821..38e5a7aa9 100644
--- a/compiler/locomotiv/src/Node/TensorBroadcast.cpp
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -34,10 +34,12 @@ using nncc::core::ADT::tensor::Shape;
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+using namespace locomotiv;
+
+void execute_node(loco::TensorBroadcast *tensor_broadcast)
{
auto input_data = annot_data(tensor_broadcast->input());
@@ -103,4 +105,14 @@ void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
annot_domain(tensor_broadcast, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+ execute_node(tensor_broadcast);
+}
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp
index 3187a7f75..188bb635b 100644
--- a/compiler/locomotiv/src/Node/TensorConcat.cpp
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -35,10 +35,12 @@ using nncc::core::ADT::tensor::Shape;
#include <cassert>
#include <stdexcept>
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConcat *tensor_concat)
{
validate(tensor_concat, "TensorConcat is nullptr");
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConcat *tensor_concat)
annot_domain(tensor_concat, loco::Domain::Tensor);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat) { execute_node(tensor_concat); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
index cd81a3a4d..5d4ad5d24 100644
--- a/compiler/locomotiv/src/Node/TensorConstantPad.cpp
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -31,10 +31,12 @@ using nncc::core::ADT::tensor::IndexEnumerator;
using nncc::core::ADT::tensor::LexicalLayout;
using nncc::core::ADT::tensor::make_buffer;
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorConstantPad *pad)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConstantPad *pad)
{
validate(pad, "TensorConstantPad is nullptr");
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConstantPad *pad)
annot_domain(pad, annot_domain(pad->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad) { execute_node(pad); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp
index a60ebd890..1f619a31a 100644
--- a/compiler/locomotiv/src/Node/TensorReduce.cpp
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -115,10 +115,12 @@ void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TensorReduce *node)
+using namespace locomotiv;
+
+void execute_node(loco::TensorReduce *node)
{
auto input_data = annot_data(node->input());
validate(input_data, "Input not ready");
@@ -149,4 +151,11 @@ void NodeExecution::execute(loco::TensorReduce *node)
annot_domain(node, annot_domain(node->input()));
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node) { execute_node(node); }
+
} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
index 3ea4f071d..bec15a5df 100644
--- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -147,10 +147,12 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
} // namespace
-namespace locomotiv
+namespace
{
-void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::TransposedConv2D *tr_conv2d)
{
auto ifm_data = annot_data(tr_conv2d->ifm());
auto ker_data = annot_data(tr_conv2d->ker());
@@ -186,4 +188,11 @@ void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
annot_domain(tr_conv2d, loco::Domain::Feature);
}
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); }
+
} // namespace locomotiv
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index 6a66f1425..47b68fa40 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,6 +1,7 @@
-nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
if (NOT TensorFlowSource_FOUND)
message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -17,6 +18,11 @@ if (NOT TensorFlowEigenSource_FOUND)
return()
endif ()
+if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+endif ()
+
add_subdirectory(core)
add_subdirectory(kernels)
add_subdirectory(loader)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 5ac3b2f7a..2ab7ff0da 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -93,6 +93,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
/*dimension_data=*/{3}, /*output_data=*/{3, 1});
}
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
+ 1, 2, 7, 8, 1, 9, 7, 3,
+ });
+ Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ ArgMaxParams params{};
+ params.output_type = DataType::U8;
+ ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index 6d1b8ead4..cdd81d7d6 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -35,6 +35,14 @@ AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DPa
void AveragePool2D::configure()
{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+ }
+ if (input()->shape().num_dims() != 4)
+ {
+ throw std::runtime_error("Input Tensor Shape must be 4-D");
+ }
const Shape &input_shape = input()->shape();
const int32_t batches = input_shape.dim(0);
@@ -51,7 +59,14 @@ void AveragePool2D::configure()
computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
_padding_width =
computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
+ if (input()->element_type() == DataType::U8)
+ {
+ if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
+ {
+ throw std::runtime_error(
+ "Quantization param for Input and output must be same(scale or zero-point)");
+ }
+ }
output()->resize({batches, output_height, output_width, depth});
}
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index 7160e49e9..cc80e5e90 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -122,6 +122,80 @@ TEST(AveragePool2DTest, Uint8_1)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
}
+TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 3, 5};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, In_Out_Type_NEG)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, Quant_Param_NEG)
+{
+ std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+ std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+ Tensor input_tensor{
+ DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+
+ std::vector<uint8_t> quant_input = quantize<uint8_t>(
+ {
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ },
+ quant_param1.first, quant_param1.second);
+ input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index a1fd1deaf..040ac5911 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -44,6 +44,8 @@ set(SOURCES
Reshape.cpp
Reverse.h
Reverse.cpp
+ Rsqrt.h
+ Rsqrt.cpp
Slice.h
Slice.cpp
Softmax.h
@@ -54,8 +56,12 @@ set(SOURCES
Split.cpp
StridedSlice.h
StridedSlice.cpp
+ Sqrt.h
+ Sqrt.cpp
Squeeze.h
Squeeze.cpp
+ Tanh.h
+ Tanh.cpp
Transpose.h
Transpose.cpp
TransposeConv.h
@@ -63,12 +69,13 @@ set(SOURCES
Unpack.h
Unpack.cpp)
-list(APPEND SOURCES Utils.h Utils.cpp)
+list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
add_library(luci_interpreter_kernels STATIC ${SOURCES})
set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
"${TensorFlowGEMMLowpSource_DIR}"
"${TensorFlowEigenSource_DIR}"
"${TensorFlowSource_DIR}")
@@ -99,12 +106,15 @@ set(TEST_SOURCES
Pad.test.cpp
Reshape.test.cpp
Reverse.test.cpp
+ Rsqrt.test.cpp
Slice.test.cpp
Softmax.test.cpp
SpaceToDepth.test.cpp
Split.test.cpp
StridedSlice.test.cpp
+ Sqrt.test.cpp
Squeeze.test.cpp
+ Tanh.test.cpp
Transpose.test.cpp
TransposeConv.test.cpp
Unpack.test.cpp)
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 60e6134ab..a51fb4afc 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -47,21 +47,21 @@ void Conv2D::configure()
// We only support (1) and (3) for now.
if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
{
- assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
}
else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
{
- assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
}
else
{
throw std::runtime_error("Unsupported type.");
}
- assert(output()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
const Shape &input_shape = input()->shape();
const Shape &filter_shape = filter()->shape();
- assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
const int32_t batches = input_shape.dim(0);
const int32_t input_height = input_shape.dim(1);
@@ -69,10 +69,10 @@ void Conv2D::configure()
const int32_t output_depth = filter_shape.dim(0);
const int32_t filter_height = filter_shape.dim(1);
const int32_t filter_width = filter_shape.dim(2);
- assert(filter_shape.dim(3) == input_shape.dim(3));
+ LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
- assert(bias() == nullptr ||
- (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth));
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+ bias()->shape().dim(0) == output_depth));
const int32_t output_height =
computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index ef9ace903..0446d9760 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -180,6 +180,146 @@ TEST(Conv2DTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
+TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<uint8_t> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 4, 6, 1};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
index fce01a605..1a26debe0 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -39,12 +39,10 @@ void LeakyRelu::configure()
assert(input()->element_type() == output()->element_type());
if (input()->element_type() == DataType::U8)
{
- _q_alpha = static_cast<uint8_t>(std::max<float>(
- std::numeric_limits<uint8_t>::min(),
- std::min<float>(std::numeric_limits<uint8_t>::max(),
- std::round(input()->zero_point() + (params().alpha / input()->scale())))));
- double real_multiplier = input()->scale() * input()->scale() / output()->scale();
- quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift);
+ double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
}
output()->resize(input()->shape());
}
@@ -77,15 +75,15 @@ void LeakyRelu::evalQuantized() const
{
tflite::LeakyReluParams op_params{};
op_params.input_offset = input()->zero_point();
- op_params.alpha_offset = input()->zero_point();
op_params.output_offset = output()->zero_point();
-
- op_params.output_multiplier = _output_multiplier;
- op_params.output_shift = _output_shift;
+ op_params.output_multiplier_alpha = _output_multiplier_alpha;
+ op_params.output_shift_alpha = _output_shift_alpha;
+ op_params.output_multiplier_identity = _output_multiplier_identity;
+ op_params.output_shift_identity = _output_shift_identity;
tflite::reference_ops::QuantizeLeakyRelu(
- op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
index dcc2be93f..e66f404df 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.h
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
@@ -41,9 +41,10 @@ private:
void evalQuantized() const;
private:
- uint8_t _q_alpha = 0;
- int32_t _output_multiplier = 0;
- int _output_shift = 0;
+ int32_t _output_multiplier_alpha = 0;
+ int _output_shift_alpha = 0;
+ int32_t _output_multiplier_identity = 0;
+ int _output_shift_identity = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index a6e721a09..dd31aa099 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -19,7 +19,8 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
#include <stdexcept>
@@ -66,13 +67,13 @@ void Mul::evalFloat() const
if (need_broadcast)
{
- tflite::reference_ops::BroadcastMul4DSlow(
+ tflite::optimized_ops::BroadcastMul4DSlow(
params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
- tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
getTensorShape(input2()), getTensorData<float>(input2()),
getTensorShape(output()), getTensorData<float>(output()));
}
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..6dd92dc98
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Rsqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = 1.f / std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+ Rsqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..69b55d2f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+ input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 5, 4, 8, 2, //
+ 6, 7.5, 9, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.44721360, 0.5, 0.35355339, 0.70710678, //
+ 0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+ });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..46e9fc9ad
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Sqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+ Sqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..cdd208280
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+ input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, 8, 2, 4, //
+ 3, 7, 10, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.0, 2.8284271, 1.4142136, 2, //
+ 1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+ });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..b649d5d2f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+ assert(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ populateLookupTable();
+ }
+ output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Tanh::evalFloat() const
+{
+ tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = getTableValue(input_data[i]);
+ }
+}
+
+void Tanh::populateLookupTable()
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+ const auto output_scale = static_cast<double>(output()->scale());
+ const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ const float transformed = std::tanh(dequantized);
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+ static_cast<uint8_t>(val));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-interpreter/src/kernels/Tanh.h
new file mode 100644
index 000000000..8017c9638
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+ Tanh(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void populateLookupTable();
+ void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+ uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+ uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..392b8672d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(TanhTest, Float)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0, -0.9999877, 0.9640275, 0.999329, //
+ 0.99505475, -0.9640275, 1, 0.7615941, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+TEST(TanhTest, Uint8)
+{
+ float kMin = -1;
+ float kMax = 127.f / 128.f;
+ float kTanhTolerance = 2 * (1. / 256);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ };
+ Tensor input_tensor{
+ DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ std::vector<uint8_t> quantize_input =
+ quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
+ input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ };
+ std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+ EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+ output_tensor.zero_point()),
+ ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
index 46380e2fa..898bae3da 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -30,8 +30,8 @@ namespace kernels
{
TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- Tensor *output, const TransposeConvParams &params)
- : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params)
+ const Tensor *bias, Tensor *output, const TransposeConvParams &params)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
{
}
@@ -106,8 +106,9 @@ void TransposeConv::evalFloat() const
op_params.output_multiplier = _output_multiplier;
tflite::reference_ops::TransposeConv(
op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()),
- tflite::RuntimeShape(), (float *)nullptr);
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
+ (float *)nullptr);
}
void TransposeConv::evalQuantized() const
@@ -145,8 +146,9 @@ void TransposeConv::evalQuantized() const
tflite::reference_ops::TransposeConv(
op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
- getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()),
- tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+ getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
+ (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
index d73e939b7..3a0eae761 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -29,11 +29,12 @@ class TransposeConv : public KernelWithParams<TransposeConvParams>
{
public:
TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- Tensor *output, const TransposeConvParams &params);
+ const Tensor *bias, Tensor *output, const TransposeConvParams &params);
const Tensor *output_shape() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
const Tensor *input() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
Tensor *output() const { return _outputs[0]; }
void configure() override;
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
index b8c0ac497..0fbe9328b 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -26,15 +26,15 @@ namespace
using namespace testing;
-template <typename T>
+template <typename T, typename B>
void Check(std::initializer_list<int32_t> output_shape_shape,
std::initializer_list<int32_t> weight_shape,
std::initializer_list<int32_t> input_data_shape,
- std::initializer_list<int32_t> output_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
- std::initializer_list<T> input_data_data, std::initializer_list<T> output_data,
- luci::Padding padding, int32_t stride_height, int32_t stride_width,
- DataType element_type)
+ std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+ std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+ int32_t stride_width, DataType element_type)
{
Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
@@ -50,21 +50,32 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
params.stride_height = stride_height;
params.stride_width = stride_width;
- TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor,
- params);
- kernel.configure();
- kernel.execute();
-
+ if (bias_data.size() != 0)
+ {
+ Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+ &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+ }
+ else
+ {
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+ &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+ }
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
}
TEST(TransposeConvTest, FloatSimple)
{
- Check<float>(
+ Check<float, float>(
/*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
- /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
/*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
/*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*bias_data=*/{},
/*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
getElementType<float>());
@@ -74,12 +85,13 @@ TEST(TransposeConvTest, FloatSimple)
TEST(TransposeConvTest, FloatTwoFiltersTest)
{
- Check<float>(
+ Check<float, float>(
/*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
- /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
/*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
/*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ /*bias_data=*/{},
/*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
3352, 3652, 2760},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
@@ -88,6 +100,24 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
SUCCEED();
}
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+ Check<float, float>(
+ /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+ /*input_shape=*/{1, 2, 2, 1},
+ /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
+ /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+ /*input_data=*/{1, 2, 3, 4},
+ /*bias_data=*/{3, 4},
+ /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21,
+ 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34,
+ 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+ /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
+ getElementType<float>());
+
+ SUCCEED();
+}
+
// TODO Uint8Simple
// Implement GetDequantizedOutput Function.
// Create Test for Uint8 Case
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 3c2cc8450..7927151c6 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -31,6 +31,11 @@ namespace luci_interpreter
namespace kernels
{
+#define LUCI_INTERPRETER_CHECK(cond) \
+ if (!(cond)) \
+ throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+ std::string(#cond) + ") was not true.");
+
inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
int32_t filter_size, int32_t out_size)
{
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
index 12c7f4526..126a1cb5b 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -37,12 +37,15 @@
#include "kernels/Pad.h"
#include "kernels/Reshape.h"
#include "kernels/Reverse.h"
+#include "kernels/Rsqrt.h"
#include "kernels/Slice.h"
#include "kernels/Softmax.h"
#include "kernels/SpaceToDepth.h"
#include "kernels/Split.h"
#include "kernels/StridedSlice.h"
+#include "kernels/Sqrt.h"
#include "kernels/Squeeze.h"
+#include "kernels/Tanh.h"
#include "kernels/Unpack.h"
#include "kernels/Transpose.h"
#include "kernels/TransposeConv.h"
@@ -430,6 +433,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
return std::make_unique<kernels::Reverse>(input, axes, output);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
{
assert(node->arity() == 3);
@@ -483,6 +496,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Sqrt>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
{
assert(node->arity() == 1);
@@ -517,6 +540,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod
return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Tanh>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
{
assert(node->arity() == 2);
@@ -530,11 +563,12 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
{
- assert(node->arity() == 3);
+ assert(node->arity() == 4);
const Tensor *input_sizes = getInputTensor(node->inputSizes());
const Tensor *filter = getInputTensor(node->filter());
const Tensor *out_backprop = getInputTensor(node->outBackprop());
+ const Tensor *bias = getOptionalInputTensor(node->bias());
Tensor *output = getOutputTensor(node);
@@ -543,7 +577,7 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *no
params.stride_height = node->stride()->h();
params.stride_width = node->stride()->w();
- return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output,
+ return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
params);
}
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h
index d5c5a4b56..31cb9d8fc 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -63,12 +63,15 @@ public:
std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index 33bc8ec9b..4e2bc3d0b 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -37,12 +37,15 @@
#include <kernels/Pad.h>
#include <kernels/Reshape.h>
#include <kernels/Reverse.h>
+#include <kernels/Rsqrt.h>
#include <kernels/Slice.h>
#include <kernels/Softmax.h>
#include <kernels/SpaceToDepth.h>
#include <kernels/Split.h>
+#include <kernels/Sqrt.h>
#include <kernels/Squeeze.h>
#include <kernels/StridedSlice.h>
+#include <kernels/Tanh.h>
#include <kernels/Transpose.h>
#include <kernels/TransposeConv.h>
#include <kernels/Unpack.h>
@@ -529,6 +532,20 @@ TEST_F(KernelBuilderTest, ReverseV2)
checkTensor(kernel->output(), op);
}
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleRsqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Rsqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Slice)
{
auto *input = createInputNode();
@@ -605,6 +622,20 @@ TEST_F(KernelBuilderTest, Split)
checkTensor(kernel->output(1), output2);
}
+TEST_F(KernelBuilderTest, Sqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Sqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Squeeze)
{
auto *input = createInputNode();
@@ -656,6 +687,20 @@ TEST_F(KernelBuilderTest, StridedSlice)
EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
}
+TEST_F(KernelBuilderTest, Tanh)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleTanh>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Tanh>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, Transpose)
{
auto *input = createInputNode();
@@ -678,11 +723,13 @@ TEST_F(KernelBuilderTest, TransposeConv)
auto *output_shape = createInputNode();
auto *filter = createInputNode();
auto *input = createInputNode();
+ auto *bias = createInputNode();
auto *op = createNode<luci::CircleTransposeConv>();
op->inputSizes(output_shape);
op->filter(filter);
op->outBackprop(input);
+ op->bias(bias);
op->padding(luci::Padding::SAME);
op->stride()->h(11);
@@ -695,6 +742,7 @@ TEST_F(KernelBuilderTest, TransposeConv)
checkTensor(kernel->filter(), filter);
checkTensor(kernel->input(), input);
checkTensor(kernel->output(), op);
+ checkTensor(kernel->bias(), bias);
EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt
index f3b6dfcfe..f2a4ff4b6 100644
--- a/compiler/luci-value-test/tester/CMakeLists.txt
+++ b/compiler/luci-value-test/tester/CMakeLists.txt
@@ -1,8 +1,6 @@
set(SRCS_EVAL_TESTER
src/EvalTester.cpp
- src/CircleExpContract.h
- src/CircleExpContract.cpp
)
add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.h b/compiler/luci-value-test/tester/src/CircleExpContract.h
deleted file mode 100644
index 4d08fb89b..000000000
--- a/compiler/luci-value-test/tester/src/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-#define __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
- CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
- {
- // NOTHING TO DO
- }
- virtual ~CircleExpContract() = default;
-
-public:
- loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
-
-public:
- bool store(const char *ptr, const size_t size) const final;
-
-private:
- luci::Module *_module;
- const std::string _filepath;
-};
-
-#endif // __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp
index 09eef223a..b49602e5e 100644
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ b/compiler/luci-value-test/tester/src/EvalTester.cpp
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
-
#include <luci/Importer.h>
#include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <cstdlib>
#include <fstream>
@@ -104,7 +104,9 @@ int entry(int argc, char **argv)
// Export to a Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(initial_module.get(), intermediate_filename);
+
+ luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
+
if (!exporter.invoke(&contract))
{
std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
diff --git a/compiler/record-minmax/src/CircleExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h
index ab00fa860..eeaf2d9bb 100644
--- a/compiler/record-minmax/src/CircleExpContract.h
+++ b/compiler/luci/export/include/luci/CircleFileExpContract.h
@@ -14,40 +14,52 @@
* limitations under the License.
*/
-#ifndef __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
-#define __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#ifndef __LUCI_CIRCLEFILEEXPCONTRACT_H__
+#define __LUCI_CIRCLEFILEEXPCONTRACT_H__
#include <loco.h>
#include <luci/CircleExporter.h>
#include <luci/IR/Module.h>
+#include <oops/InternalExn.h>
#include <string>
+#include <fstream>
+#include <iostream>
-namespace record_minmax
+namespace luci
{
-struct CircleExpContract : public luci::CircleExporter::Contract
+struct CircleFileExpContract : public luci::CircleExporter::Contract
{
public:
- CircleExpContract(luci::Module *module, const std::string &filename)
+ CircleFileExpContract(luci::Module *module, const std::string &filename)
: _module(module), _filepath(filename)
{
// NOTHING TO DO
}
- virtual ~CircleExpContract() = default;
+ virtual ~CircleFileExpContract() = default;
public:
loco::Graph *graph(void) const final { return nullptr; }
- luci::Module *module(void) const final { return _module; };
+ luci::Module *module(void) const final { return _module; }
public:
- bool store(const char *ptr, const size_t size) const final;
+ bool store(const char *ptr, const size_t size) const final
+ {
+ if (!ptr)
+ INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+ std::ofstream fs(_filepath, std::ofstream::binary);
+ fs.write(ptr, size);
+
+ return fs.good();
+ }
private:
luci::Module *_module;
const std::string _filepath;
};
-} // namespace record_minmax
+} // namespace luci
-#endif // __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#endif // __LUCI_CIRCLEFILEEXPCONTRACT_H__
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
index bca122050..36d61f6c9 100644
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -38,12 +38,578 @@ namespace
using namespace luci;
+struct ExportContext
+{
+ FlatBufferBuilder &builder;
+ SerializedModelData &md;
+ SerializedGraphData &gd;
+};
+
+/**
+ * @brief Exports CircleMaxPool2D or CircleAveragePool2D
+ *
+ * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
+ */
+template <class CirclePool2D>
+void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
+{
+ LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ "Should be L2Pool, MaxPool or AvgPool");
+ LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+ circle::Padding padding = getOpPadding(node->padding());
+
+ auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
+ node->filter()->w(), node->filter()->h(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_Pool2DOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
+ circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->arg(i)));
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes having void options
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->arg(i)));
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleAddN *node)
+{
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->inputs(i)));
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateAddNOptions(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_AddNOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCast *node)
+{
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+ flatbuffers::Offset<Operator> op_offset;
+ if (node->out_data_type() != loco::DataType::Unknown)
+ {
+ auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
+ to_circle_tensortype(node->out_data_type()));
+ op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_CastOptions, options.Union());
+ }
+ else
+ {
+ op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+ }
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
+ to_circle_actfunc(node->fusedActivationFunction()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ConcatenationOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCustom *node)
+{
+ auto custom_outputs = loco::succs(node);
+
+ uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t index = 0; index < node->numInputs(); index++)
+ {
+ inputs_vec.push_back(get_tensor_index(node->inputs(index)));
+ }
+ for (uint32_t index = 0; index < custom_outputs.size(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : custom_outputs)
+ {
+ auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+ if (custom_out->index() == static_cast<int32_t>(index))
+ {
+ outputs_vec.push_back(get_tensor_index(custom_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Custom output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
+ std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
+ node->custom_options().end()};
+ circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void>(), circle_custom_options);
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleIf *node)
+{
+ auto if_outs = loco::succs(node);
+ assert(if_outs.size() == node->output_count());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ inputs_vec.push_back(get_tensor_index(node->cond()));
+ for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+ inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+ for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : if_outs)
+ {
+ auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+ if (if_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(if_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid CircleIf output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_IfOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
+{
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 2);
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+ node->op_version());
+ std::vector<int32_t> inputs_vec{
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()),
+ };
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+ if (nms_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(nms_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
+ auto op_offset =
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
+{
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 3);
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+ node->op_version());
+ std::vector<int32_t> inputs_vec{
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+ };
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+ if (nms_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(nms_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
+ auto op_offset =
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
+{
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
+ std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateReverseV2Options(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplit *node)
+{
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
+ // NOTE BuiltinOperator_SPLIT input is placed at second position
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
+ get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num_split(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+ if (split_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(split_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Split output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateSplitOptions(ctx.builder, node->num_split());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SplitOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplitV *node)
+{
+ auto split_outs = loco::succs(node);
+ assert(int32_t(split_outs.size()) == node->num_split());
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+ get_tensor_index(node->size_splits()),
+ get_tensor_index(node->split_dim())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num_split(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : split_outs)
+ {
+ auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+ if (split_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(split_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid SplitV output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateSplitVOptions(ctx.builder, node->num_split());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_SplitVOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
+{
+ auto topkv2_outs = loco::succs(node);
+ int outs_count = int32_t(topkv2_outs.size());
+ assert(outs_count == 2);
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < outs_count; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : topkv2_outs)
+ {
+ auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+ if (topkv2_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(topkv2_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid TopKV2 output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateTopKV2Options(ctx.builder);
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_TopKV2Options, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnique *node)
+{
+ auto unique_outs = loco::succs(node);
+ assert(int32_t(unique_outs.size()) == 2);
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < 2; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : unique_outs)
+ {
+ auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+ if (unique_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(unique_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Unique output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_UniqueOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnpack *node)
+{
+ LOGGER(l);
+ auto settings = luci::UserSettings::settings();
+
+ auto unpack_outs = loco::succs(node);
+ // NOTE real models may not use all of the outputs
+ if (static_cast<int32_t>(unpack_outs.size()) != node->num())
+ {
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ {
+ WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
+ }
+ else
+ assert(false);
+ }
+
+ uint32_t op_idx =
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < node->num(); index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : unpack_outs)
+ {
+ auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+ if (unpack_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(unpack_out));
+ found = true;
+ break;
+ }
+ }
+ // NOTE real models may not use all of the outputs
+ if (!found)
+ {
+ if (settings->get(luci::UserSettings::Key::DisableValidation))
+ {
+ WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
+ }
+ else
+ assert(false);
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_UnpackOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleWhile *node)
+{
+ auto while_outs = loco::succs(node);
+ assert(while_outs.size() == node->output_count());
+
+ uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
+ std::vector<int32_t> inputs_vec;
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+ inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+ for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : while_outs)
+ {
+ auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+ if (while_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(while_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid CircleWhile output");
+ }
+ }
+
+ auto inputs = ctx.builder.CreateVector(inputs_vec);
+ auto outputs = ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
+ auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_WhileOptions, options.Union());
+ ctx.gd._operators.push_back(op_offset);
+}
+
class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
public loco::CanonicalNodeMutableVisitor<void>
{
public:
- OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g)
- : builder{fbb}, md{m}, gd{g}
+ OperationExporter(ExportContext &ctx) : _ctx{ctx}
{
// DO NOTHING
}
@@ -103,10 +669,12 @@ public:
void visit(luci::CircleMul *) final;
void visit(luci::CircleNeg *) final;
void visit(luci::CircleNonMaxSuppressionV4 *) final;
+ void visit(luci::CircleNonMaxSuppressionV5 *) final;
void visit(luci::CircleNotEqual *) final;
void visit(luci::CircleOneHot *) final;
void visit(luci::CirclePack *) final;
void visit(luci::CirclePad *) final;
+ void visit(luci::CirclePadV2 *) final;
void visit(luci::CirclePow *) final;
void visit(luci::CirclePRelu *) final;
void visit(luci::CircleRange *) final;
@@ -168,6 +736,7 @@ public:
void visit(luci::CircleCustomOut *) final {}
void visit(luci::CircleIfOut *) final {}
void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+ void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
void visit(luci::CircleSplitOut *) final {}
void visit(luci::CircleSplitVOut *) final {}
void visit(luci::CircleTopKV2Out *) final {}
@@ -177,14 +746,6 @@ public:
private:
/**
- * @brief Exports CircleMaxPool2D or CircleAveragePool2D
- *
- * @note CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
- */
- template <class CirclePool2D>
- void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op);
-
- /**
* @brief export simple nodes
*/
void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
@@ -196,179 +757,83 @@ private:
void export_simple(loco::Node *node, circle::BuiltinOperator bop);
private:
- FlatBufferBuilder &builder;
- SerializedModelData &md;
- SerializedGraphData &gd;
+ ExportContext &_ctx;
};
-template <class CirclePool2D>
-void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
- LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
- builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
- "Should be L2Pool, MaxPool or AvgPool");
- LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
- uint32_t op_idx = md.registerBuiltinOpcode(builtin_op, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
-
- circle::Padding padding = getOpPadding(node->padding());
-
- auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
- node->filter()->w(), node->filter()->h(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_Pool2DOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
-
void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop,
circle::BuiltinOptions bot,
flatbuffers::Offset<void> options_offset)
{
- uint32_t op_idx =
- md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(node)};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, bot, options_offset);
- gd._operators.push_back(op_offset);
+ export_node(_ctx, node, bop, bot, options_offset);
}
void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop)
{
- uint32_t op_idx =
- md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->arg(i)));
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
- gd._operators.push_back(op_offset);
+ export_node(_ctx, node, bop);
}
void OperationExporter::visit(luci::CircleAbs *node)
{
export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
- CreateAbsOptions(builder).Union());
+ CreateAbsOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleAdd *node)
{
export_simple(
node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
- CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
-void OperationExporter::visit(luci::CircleAddN *node)
-{
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->arity(); ++i)
- inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateAddNOptions(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_AddNOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleArgMax *node)
{
- export_simple(node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
- CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+ CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleArgMin *node)
{
- export_simple(node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
- CreateArgMinOptions(builder, to_circle_tensortype(node->output_type())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+ CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleAveragePool2D *node)
{
- export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+ export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
}
void OperationExporter::visit(luci::CircleBatchMatMul *node)
{
export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
circle::BuiltinOptions_BatchMatMulOptions,
- CreateBatchMatMulOptions(builder, node->adj_x(), node->adj_y()).Union());
+ CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
}
-void OperationExporter::visit(luci::CircleCast *node)
-{
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
-
- flatbuffers::Offset<Operator> op_offset;
- if (node->out_data_type() != loco::DataType::Unknown)
- {
- auto options = CreateCastOptions(builder, to_circle_tensortype(node->in_data_type()),
- to_circle_tensortype(node->out_data_type()));
- op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_CastOptions,
- options.Union());
- }
- else
- {
- op_offset = CreateOperator(builder, op_idx, inputs, outputs);
- }
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleCeil *node)
{
export_simple(node, circle::BuiltinOperator_CEIL);
}
-void OperationExporter::visit(luci::CircleConcatenation *node)
-{
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
- for (uint32_t i = 0; i < node->numValues(); ++i)
- inputs_vec.push_back(get_tensor_index(node->values(i)));
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateConcatenationOptions(builder, node->axis(),
- to_circle_actfunc(node->fusedActivationFunction()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ConcatenationOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
{
export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
circle::BuiltinOptions_BatchToSpaceNDOptions,
- CreateBatchToSpaceNDOptions(builder).Union());
+ CreateBatchToSpaceNDOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleConv2D *node)
{
export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
- CreateConv2DOptions(builder, getOpPadding(node->padding()), node->stride()->w(),
- node->stride()->h(),
+ CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
+ node->stride()->w(), node->stride()->h(),
to_circle_actfunc(node->fusedActivationFunction()),
node->dilation()->w(), node->dilation()->h())
.Union());
@@ -377,64 +842,23 @@ void OperationExporter::visit(luci::CircleConv2D *node)
void OperationExporter::visit(luci::CircleCos *node)
{
export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
- CreateCosOptions(builder).Union());
+ CreateCosOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleCustom *node)
-{
- auto custom_outputs = loco::succs(node);
-
- uint32_t op_idx = md.registerCustomOpcode(node->custom_code());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t index = 0; index < node->numInputs(); index++)
- {
- inputs_vec.push_back(get_tensor_index(node->inputs(index)));
- }
- for (uint32_t index = 0; index < custom_outputs.size(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : custom_outputs)
- {
- auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
- if (custom_out->index() == static_cast<int32_t>(index))
- {
- outputs_vec.push_back(get_tensor_index(custom_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Custom output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
- std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
- node->custom_options().end()};
- circle_custom_options = builder.CreateVector(custom_options_vec);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void>(), circle_custom_options);
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleDepthToSpace *node)
{
export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
circle::BuiltinOptions_DepthToSpaceOptions,
- CreateDepthToSpaceOptions(builder, node->block_size()).Union());
+ CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
}
void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
{
export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
circle::BuiltinOptions_DepthwiseConv2DOptions,
- CreateDepthwiseConv2DOptions(builder, getOpPadding(node->padding()),
+ CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
node->stride()->w(), node->stride()->h(),
node->depthMultiplier(),
to_circle_actfunc(node->fusedActivationFunction()),
@@ -446,7 +870,7 @@ void OperationExporter::visit(luci::CircleDiv *node)
{
export_simple(
node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
- CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleElu *node)
@@ -457,25 +881,25 @@ void OperationExporter::visit(luci::CircleElu *node)
void OperationExporter::visit(luci::CircleEqual *node)
{
export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
- CreateEqualOptions(builder).Union());
+ CreateEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleExp *node)
{
export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
- CreateExpOptions(builder).Union());
+ CreateExpOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleExpandDims *node)
{
export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
- CreateExpandDimsOptions(builder).Union());
+ CreateExpandDimsOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFill *node)
{
export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
- CreateFillOptions(builder).Union());
+ CreateFillOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFloor *node)
@@ -486,124 +910,86 @@ void OperationExporter::visit(luci::CircleFloor *node)
void OperationExporter::visit(luci::CircleFloorDiv *node)
{
export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
- CreateFloorDivOptions(builder).Union());
+ CreateFloorDivOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFloorMod *node)
{
export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
- CreateFloorModOptions(builder).Union());
+ CreateFloorModOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleFullyConnected *node)
{
export_simple(
node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
- CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()))
+ CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
void OperationExporter::visit(luci::CircleGather *node)
{
export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
- CreateGatherOptions(builder, node->axis()).Union());
+ CreateGatherOptions(_ctx.builder, node->axis()).Union());
}
void OperationExporter::visit(luci::CircleGatherNd *node)
{
export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
- CreateGatherNdOptions(builder).Union());
+ CreateGatherNdOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleGreater *node)
{
export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
- CreateGreaterOptions(builder).Union());
+ CreateGreaterOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleGreaterEqual *node)
{
export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
circle::BuiltinOptions_GreaterEqualOptions,
- CreateGreaterEqualOptions(builder).Union());
+ CreateGreaterEqualOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleIf *node)
-{
- auto if_outs = loco::succs(node);
- assert(if_outs.size() == node->output_count());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- inputs_vec.push_back(get_tensor_index(node->cond()));
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : if_outs)
- {
- auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
- if (if_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(if_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleIf output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateIfOptions(builder, node->then_branch(), node->else_branch());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_IfOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleL2Normalize *node)
{
export_simple(
node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
- CreateL2NormOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
+ .Union());
}
void OperationExporter::visit(luci::CircleL2Pool2D *node)
{
- export_pool_2d<luci::CircleL2Pool2D>(node, circle::BuiltinOperator_L2_POOL_2D);
+ export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
}
void OperationExporter::visit(luci::CircleLeakyRelu *node)
{
export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
- CreateLeakyReluOptions(builder, node->alpha()).Union());
+ CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
}
void OperationExporter::visit(luci::CircleLess *node)
{
export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
- CreateLessOptions(builder).Union());
+ CreateLessOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLessEqual *node)
{
export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
- CreateLessEqualOptions(builder).Union());
+ CreateLessEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
{
export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
circle::BuiltinOptions_LocalResponseNormalizationOptions,
- CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+ CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
node->alpha(), node->beta())
.Union());
}
@@ -616,19 +1002,19 @@ void OperationExporter::visit(luci::CircleLog *node)
void OperationExporter::visit(luci::CircleLogicalAnd *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
- CreateLogicalAndOptions(builder).Union());
+ CreateLogicalAndOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogicalNot *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
- CreateLogicalNotOptions(builder).Union());
+ CreateLogicalNotOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogicalOr *node)
{
export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
- CreateLogicalOrOptions(builder).Union());
+ CreateLogicalOrOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleLogistic *node)
@@ -639,135 +1025,103 @@ void OperationExporter::visit(luci::CircleLogistic *node)
void OperationExporter::visit(luci::CircleLogSoftmax *node)
{
export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
- CreateLogSoftmaxOptions(builder).Union());
+ CreateLogSoftmaxOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMatrixDiag *node)
{
export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
- CreateMatrixDiagOptions(builder).Union());
+ CreateMatrixDiagOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMatrixSetDiag *node)
{
export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
circle::BuiltinOptions_MatrixSetDiagOptions,
- CreateMatrixSetDiagOptions(builder).Union());
+ CreateMatrixSetDiagOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMaximum *node)
{
export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(builder).Union());
+ CreateMaximumMinimumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMaxPool2D *node)
{
- export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+ export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
}
void OperationExporter::visit(luci::CircleMean *node)
{
export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleMinimum *node)
{
export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
- CreateMaximumMinimumOptions(builder).Union());
+ CreateMaximumMinimumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleMirrorPad *node)
{
- export_simple(node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
- CreateMirrorPadOptions(builder, to_circle_mirrorpadmode(node->mode())).Union());
+ export_simple(
+ node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+ CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
}
void OperationExporter::visit(luci::CircleMul *node)
{
export_simple(
node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
- CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleNeg *node)
{
export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
- CreateNegOptions(builder).Union());
+ CreateNegOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
- auto nms_outs = loco::succs(node);
- assert(nms_outs.size() == 2);
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); }
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
- std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()),
- };
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : nms_outs)
- {
- auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
- if (nms_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(nms_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateNonMaxSuppressionV4Options(builder);
- auto op_offset =
- CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleNotEqual *node)
{
export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
- CreateNotEqualOptions(builder).Union());
+ CreateNotEqualOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleOneHot *node)
{
export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
- CreateOneHotOptions(builder, node->axis()).Union());
+ CreateOneHotOptions(_ctx.builder, node->axis()).Union());
}
void OperationExporter::visit(luci::CirclePack *node)
{
export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
- CreatePackOptions(builder, node->values_count(), node->axis()).Union());
+ CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
}
void OperationExporter::visit(luci::CirclePad *node)
{
export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
- CreatePadOptions(builder).Union());
+ CreatePadOptions(_ctx.builder).Union());
+}
+
+void OperationExporter::visit(luci::CirclePadV2 *node)
+{
+ export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
+ CreatePadV2Options(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CirclePow *node)
{
export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
- CreatePowOptions(builder).Union());
+ CreatePowOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CirclePRelu *node)
@@ -778,37 +1132,37 @@ void OperationExporter::visit(luci::CirclePRelu *node)
void OperationExporter::visit(luci::CircleRange *node)
{
export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
- CreateRangeOptions(builder).Union());
+ CreateRangeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleRank *node)
{
export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
- CreateRankOptions(builder).Union());
+ CreateRankOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleReduceAny *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceMax *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceMin *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleReduceProd *node)
{
export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleRelu *node)
@@ -828,18 +1182,18 @@ void OperationExporter::visit(luci::CircleReluN1To1 *node)
void OperationExporter::visit(luci::CircleReshape *node)
{
- auto new_shape = builder.CreateVector<int32_t>(
+ auto new_shape = _ctx.builder.CreateVector<int32_t>(
node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
- CreateReshapeOptions(builder, new_shape).Union());
+ CreateReshapeOptions(_ctx.builder, new_shape).Union());
}
void OperationExporter::visit(luci::CircleResizeBilinear *node)
{
export_simple(
node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
- CreateResizeBilinearOptions(builder, node->align_corners(), node->half_pixel_centers())
+ CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
.Union());
}
@@ -847,29 +1201,17 @@ void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
{
export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
circle::BuiltinOptions_ResizeNearestNeighborOptions,
- CreateResizeNearestNeighborOptions(builder, node->align_corners()).Union());
+ CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
}
void OperationExporter::visit(luci::CircleReverseSequence *node)
{
export_simple(
node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
- CreateReverseSequenceOptions(builder, node->seq_axis(), node->batch_axis()).Union());
+ CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
}
-void OperationExporter::visit(luci::CircleReverseV2 *node)
-{
- uint32_t op_idx =
- md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
- std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateReverseV2Options(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleRound *node)
{
@@ -884,31 +1226,31 @@ void OperationExporter::visit(luci::CircleRsqrt *node)
void OperationExporter::visit(luci::CircleScatterNd *node)
{
export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
- CreateScatterNdOptions(builder).Union());
+ CreateScatterNdOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSegmentSum *node)
{
export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
- CreateSegmentSumOptions(builder).Union());
+ CreateSegmentSumOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSelect *node)
{
export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
- CreateSelectOptions(builder).Union());
+ CreateSelectOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSelectV2 *node)
{
export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
- CreateSelectV2Options(builder).Union());
+ CreateSelectV2Options(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleShape *node)
{
export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
- CreateShapeOptions(builder, to_circle_tensortype(node->out_type())).Union());
+ CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
}
void OperationExporter::visit(luci::CircleSin *node)
@@ -919,113 +1261,39 @@ void OperationExporter::visit(luci::CircleSin *node)
void OperationExporter::visit(luci::CircleSlice *node)
{
export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
- CreateSliceOptions(builder).Union());
+ CreateSliceOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSoftmax *node)
{
export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
- CreateSoftmaxOptions(builder, node->beta()).Union());
+ CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
}
void OperationExporter::visit(luci::CircleSpaceToBatchND *node)
{
export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
circle::BuiltinOptions_SpaceToBatchNDOptions,
- CreateSpaceToBatchNDOptions(builder).Union());
+ CreateSpaceToBatchNDOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSpaceToDepth *node)
{
export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
circle::BuiltinOptions_SpaceToDepthOptions,
- CreateSpaceToDepthOptions(builder, node->block_size()).Union());
+ CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
}
void OperationExporter::visit(luci::CircleSparseToDense *node)
{
export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
circle::BuiltinOptions_SparseToDenseOptions,
- CreateSparseToDenseOptions(builder, node->validate_indices()).Union());
+ CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
}
-void OperationExporter::visit(luci::CircleSplit *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
- // NOTE BuiltinOperator_SPLIT input is placed at second position
- std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
- get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Split output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateSplitOptions(builder, node->num_split());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleSplitV *node)
-{
- auto split_outs = loco::succs(node);
- assert(int32_t(split_outs.size()) == node->num_split());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
- get_tensor_index(node->size_splits()),
- get_tensor_index(node->split_dim())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num_split(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : split_outs)
- {
- auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
- if (split_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(split_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid SplitV output");
- }
- }
+void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateSplitVOptions(builder, node->num_split());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_SplitVOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleSqrt *node)
{
@@ -1035,28 +1303,28 @@ void OperationExporter::visit(luci::CircleSqrt *node)
void OperationExporter::visit(luci::CircleSquare *node)
{
export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
- CreateSquareOptions(builder).Union());
+ CreateSquareOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSquaredDifference *node)
{
export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
circle::BuiltinOptions_SquaredDifferenceOptions,
- CreateSquaredDifferenceOptions(builder).Union());
+ CreateSquaredDifferenceOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleSqueeze *node)
{
- auto squeeze_dims = builder.CreateVector<int32_t>(node->squeeze_dims());
+ auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
- CreateSqueezeOptions(builder, squeeze_dims).Union());
+ CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
}
void OperationExporter::visit(luci::CircleStridedSlice *node)
{
export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
circle::BuiltinOptions_StridedSliceOptions,
- CreateStridedSliceOptions(builder, node->begin_mask(), node->end_mask(),
+ CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
node->ellipsis_mask(), node->new_axis_mask(),
node->shrink_axis_mask())
.Union());
@@ -1066,13 +1334,13 @@ void OperationExporter::visit(luci::CircleSub *node)
{
export_simple(
node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
- CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleSum *node)
{
export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
- CreateReducerOptions(builder, node->keep_dims()).Union());
+ CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
}
void OperationExporter::visit(luci::CircleTanh *node)
@@ -1083,226 +1351,65 @@ void OperationExporter::visit(luci::CircleTanh *node)
void OperationExporter::visit(luci::CircleTile *node)
{
export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
- CreateTileOptions(builder).Union());
+ CreateTileOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleTopKV2 *node)
-{
- auto topkv2_outs = loco::succs(node);
- int outs_count = int32_t(topkv2_outs.size());
- assert(outs_count == 2);
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < outs_count; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : topkv2_outs)
- {
- auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
- if (topkv2_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(topkv2_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid TopKV2 output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateTopKV2Options(builder);
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_TopKV2Options, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleTranspose *node)
{
export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
- CreateTransposeOptions(builder).Union());
+ CreateTransposeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleTransposeConv *node)
{
export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
circle::BuiltinOptions_TransposeConvOptions,
- CreateTransposeConvOptions(builder, getOpPadding(node->padding()),
+ CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
node->stride()->w(), node->stride()->h())
.Union());
}
-void OperationExporter::visit(luci::CircleUnique *node)
-{
- auto unique_outs = loco::succs(node);
- assert(int32_t(unique_outs.size()) == 2);
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
- std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < 2; index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unique_outs)
- {
- auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
- if (unique_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unique_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid Unique output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UniqueOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleUnpack *node)
-{
- LOGGER(l);
- auto settings = luci::UserSettings::settings();
-
- auto unpack_outs = loco::succs(node);
- // NOTE real models may not use all of the outputs
- if (static_cast<int32_t>(unpack_outs.size()) != node->num())
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
- }
- else
- assert(false);
- }
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
- std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
- std::vector<int32_t> outputs_vec;
-
- for (int32_t index = 0; index < node->num(); index++)
- {
- // store in order of index
- bool found = false;
- for (auto out : unpack_outs)
- {
- auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
- if (unpack_out->index() == index)
- {
- outputs_vec.push_back(get_tensor_index(unpack_out));
- found = true;
- break;
- }
- }
- // NOTE real models may not use all of the outputs
- if (!found)
- {
- if (settings->get(luci::UserSettings::Key::DisableValidation))
- {
- WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
- }
- else
- assert(false);
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateUnpackOptions(builder, node->num(), node->axis());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_UnpackOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleWhere *node)
{
export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
- CreateWhereOptions(builder).Union());
+ CreateWhereOptions(_ctx.builder).Union());
}
-void OperationExporter::visit(luci::CircleWhile *node)
-{
- auto while_outs = loco::succs(node);
- assert(while_outs.size() == node->output_count());
-
- uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
- std::vector<int32_t> inputs_vec;
- std::vector<int32_t> outputs_vec;
-
- for (uint32_t idx = 0; idx < node->input_count(); ++idx)
- inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
- for (uint32_t idx = 0; idx < node->output_count(); ++idx)
- {
- // store in order of index
- bool found = false;
- for (auto out : while_outs)
- {
- auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
- if (while_out->index() == static_cast<int32_t>(idx))
- {
- outputs_vec.push_back(get_tensor_index(while_out));
- found = true;
- break;
- }
- }
- if (!found)
- {
- INTERNAL_EXN("Invalid CircleWhile output");
- }
- }
-
- auto inputs = builder.CreateVector(inputs_vec);
- auto outputs = builder.CreateVector(outputs_vec);
- auto options = CreateWhileOptions(builder, node->cond_branch(), node->body_branch());
- auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_WhileOptions, options.Union());
- gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleZerosLike *node)
{
export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
- CreateZerosLikeOptions(builder).Union());
+ CreateZerosLikeOptions(_ctx.builder).Union());
}
void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
{
export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
circle::BuiltinOptions_BCQFullyConnectedOptions,
- CreateBCQFullyConnectedOptions(builder, node->weights_hidden_size(),
+ CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
void OperationExporter::visit(luci::CircleBCQGather *node)
{
- export_simple(node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
- CreateBCQGatherOptions(builder, node->input_hidden_size(), node->axis()).Union());
+ export_simple(
+ node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+ CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
}
void OperationExporter::visit(luci::CircleInstanceNorm *node)
{
export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
circle::BuiltinOptions_InstanceNormOptions,
- CreateInstanceNormOptions(builder, node->epsilon(),
+ CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
to_circle_actfunc(node->fusedActivationFunction()))
.Union());
}
@@ -1312,7 +1419,8 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
{
if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
{
- OperationExporter exporter{builder, md, gd};
+ ExportContext ctx{builder, md, gd};
+ OperationExporter exporter{ctx};
circle_node->accept(&exporter);
}
else
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index 825c2147d..0b21d380f 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -74,10 +74,12 @@
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
#include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
#include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
#include "Nodes/CirclePow.h"
#include "Nodes/CirclePRelu.h"
#include "Nodes/CircleRange.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
new file mode 100644
index 000000000..62be0758e
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+ void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h
new file mode 100644
index 000000000..089f52c81
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePadV2GraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index cc328cc16..c6bcacb54 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -83,10 +83,12 @@ GraphBuilderRegistry::GraphBuilderRegistry()
CIRCLE_NODE(MUL, CircleMulGraphBuilder); // 18
CIRCLE_NODE(NEG, CircleNegGraphBuilder); // 59
CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder); // 120,
+ CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder); // 121,
CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder); // 72
CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder); // 85
CIRCLE_NODE(PACK, CirclePackGraphBuilder); // 83
CIRCLE_NODE(PAD, CirclePadGraphBuilder); // 34
+ CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder); // 60
CIRCLE_NODE(POW, CirclePowGraphBuilder); // 78
CIRCLE_NODE(PRELU, CirclePReluGraphBuilder); // 54,
CIRCLE_NODE(RANGE, CircleRangeGraphBuilder); // 96
@@ -155,11 +157,9 @@ GraphBuilderRegistry::GraphBuilderRegistry()
// BuiltinOperator_DELEGATE = 51,
// BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
// BuiltinOperator_ARG_MAX = 56,
- // BuiltinOperator_PADV2 = 60,
// BuiltinOperator_FAKE_QUANT = 80,
// BuiltinOperator_QUANTIZE = 114,
// BuiltinOperator_HARD_SWISH = 117,
- // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
// BuiltinOperator_DENSIFY = 124,
}
diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
index 8c2039fff..7faab141c 100644
--- a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
@@ -18,49 +18,16 @@
#include <luci/IR/Nodes/CircleBatchToSpaceND.h>
-#include <loco.h>
+#include "ValidateHelpers.h"
-#include <cassert>
+#include <loco.h>
namespace luci
{
bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 3)
- return false;
-
- // input 1 and 2 should have INT32/INT64 type
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
- switch (tensor_1->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
- const auto &tensor_2 = tensors.at(inputs.at(2));
- switch (tensor_2->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- // Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto t_0_s = tensor_0->shape.size();
- if (t_0_s != 3 && t_0_s != 4)
- return false;
-
- // TODO check input shape
-
- return true;
+ return validate_batch_space_nd(args);
}
CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index 7131dc115..fad7a0757 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -118,6 +118,10 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
break;
+ case loco::DataType::S8:
+ copy_data<loco::DataType::S8>(buffer, num_elements, const_node);
+ break;
+
case loco::DataType::S16:
copy_data<loco::DataType::S16>(buffer, num_elements, const_node);
break;
diff --git a/compiler/luci/import/src/Nodes/CircleMaximum.cpp b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
index 4d1468f19..805d5bc89 100644
--- a/compiler/luci/import/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
@@ -18,6 +18,8 @@
#include <luci/IR/Nodes/CircleMaximum.h>
+#include "ValidateHelpers.h"
+
#include <loco.h>
namespace luci
@@ -25,37 +27,7 @@ namespace luci
bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
-
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_FLOAT64:
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- if (tensors[inputs.at(1)]->type != tensor->type)
- return false;
-
- if (tensors[outputs[0]]->type != tensor->type)
- return false;
-
- return true;
+ return validate_minmax(args);
}
CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleMinimum.cpp b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
index 8b4daf197..381039e88 100644
--- a/compiler/luci/import/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
@@ -18,6 +18,8 @@
#include <luci/IR/Nodes/CircleMinimum.h>
+#include "ValidateHelpers.h"
+
#include <loco.h>
namespace luci
@@ -25,37 +27,7 @@ namespace luci
bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs.at(0));
-
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_FLOAT64:
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- if (tensors[inputs.at(1)]->type != tensor->type)
- return false;
-
- if (tensors[outputs[0]]->type != tensor->type)
- return false;
-
- return true;
+ return validate_minmax(args);
}
CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..241dbf5ff
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 6)
+ return false;
+ if (outputs.size() != 3)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &boxes_tensor = tensors.at(inputs[0]);
+ if (boxes_tensor->shape.size() != 2)
+ return false;
+ if (boxes_tensor->shape.at(1) != 4)
+ return false;
+ if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+ return false;
+
+ if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+ return false;
+ if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+ return false;
+ if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+ return false;
+ if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+ return false;
+
+ return true;
+}
+
+/**
+ * @brief NonMaxSuppressionV5 Node builder
+ *
+ * @note Current loco does not provide multiple outputs
+ * We will create multiple NonMasSuppressionV5Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+ const auto &opcodes = context->reader()->opcodes();
+ auto tensors_ptr = context->reader()->tensors_ptr();
+ assert(tensors_ptr != nullptr);
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+ }
+
+ // Create CircleNonMaxSuppressionV5
+ auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
+ node->boxes(input_nodes[0]);
+ node->scores(input_nodes[1]);
+ node->max_output_size(input_nodes[2]);
+ node->iou_threshold(input_nodes[3]);
+ node->score_threshold(input_nodes[4]);
+ node->soft_nms_sigma(input_nodes[5]);
+
+ assert(outputs.size() == 3);
+ {
+ // Let's use name of output 0 as NonMaxSuppressionV5 name
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ node->name(tensor_name(output_tensor));
+ node->op_version(opcodes[op.opcode_index].get()->version);
+
+ // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
+ }
+
+ // Create virtual outputs of NonMaxSuppressionV5
+ for (size_t n = 0; n < outputs.size(); ++n)
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+ auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
+ copy_tensor_attributes(output_tensor, nodeout);
+
+ // mark shape_status
+ if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ nodeout->shape_status(ShapeStatus::NOSHAPE);
+ else
+ nodeout->shape_status(ShapeStatus::VALID);
+
+ nodeout->input(node);
+ nodeout->index(n);
+
+ context->nodefinder()->enroll(outputs[n], nodeout);
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
new file mode 100644
index 000000000..493876e68
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePadV2.h"
+
+#include <luci/IR/Nodes/CirclePadV2.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 3)
+ return false;
+
+ if (args.op.outputs.size() != 1)
+ return false;
+
+ return true;
+}
+
+CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CirclePadV2>();
+ node->input(inputs[0]);
+ node->paddings(inputs[1]);
+ node->constant_values(inputs[2]);
+
+ const auto *options = op.builtin_options.AsPadV2Options();
+ (void)options; // There are no options.
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
index 05492dbc6..e633abf7d 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
@@ -18,33 +18,14 @@
#include <luci/IR/Nodes/CircleReduceMax.h>
+#include "ValidateHelpers.h"
+
namespace luci
{
bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs.at(1));
-
- switch (tensor_axis->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- return true;
+ return validate_reduce_minmax(args);
}
CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
index 117d5295a..bfc3001f8 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
@@ -18,33 +18,14 @@
#include <luci/IR/Nodes/CircleReduceMin.h>
+#include "ValidateHelpers.h"
+
namespace luci
{
bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
- const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs.at(1));
-
- switch (tensor_axis->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- return true;
+ return validate_reduce_minmax(args);
}
CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
index c1d508e3e..fbf9f6b12 100644
--- a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
@@ -18,49 +18,16 @@
#include <luci/IR/Nodes/CircleSpaceToBatchND.h>
-#include <loco.h>
+#include "ValidateHelpers.h"
-#include <cassert>
+#include <loco.h>
namespace luci
{
bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 3)
- return false;
-
- // input 1 and 2 should have INT32/INT64 type
- const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs.at(1));
- switch (tensor_1->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
- const auto &tensor_2 = tensors.at(inputs.at(2));
- switch (tensor_2->type)
- {
- case circle::TensorType_INT32:
- case circle::TensorType_INT64:
- break;
- default:
- return false;
- }
-
- // Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs.at(0));
- const auto t_0_s = tensor_0->shape.size();
- if (t_0_s != 3 && t_0_s != 4)
- return false;
-
- // TODO check input shape
-
- return true;
+ return validate_batch_space_nd(args);
}
CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
index 26d575e90..ac756b1f3 100644
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -42,7 +42,8 @@ CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT
node->default_value(inputs.at(3));
const auto *options = op.builtin_options.AsSparseToDenseOptions();
- node->validate_indices(options->validate_indices);
+ if (options)
+ node->validate_indices(options->validate_indices);
return node;
}
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
index ddb196657..c280faaf5 100644
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -27,7 +27,7 @@ namespace luci
bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
+ if (args.op.inputs.size() != 3 && args.op.inputs.size() != 4)
return false;
const auto &inputs = args.op.inputs;
@@ -60,6 +60,17 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
node->inputSizes(inputs.at(0));
node->filter(inputs.at(1));
node->outBackprop(inputs.at(2));
+ if (inputs.size() == 3)
+ node->bias(graph->nodes()->create<CircleOutputExclude>());
+ else
+ node->bias(inputs.at(3));
+
+ if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
+ {
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
+ // is inserted.
+ bias->dtype(loco::DataType::FLOAT32);
+ }
const auto *options = op.builtin_options.AsTransposeConvOptions();
node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp
new file mode 100644
index 000000000..12a6548d6
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ValidateHelpers.h"
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ if (inputs.size() != 3)
+ return false;
+
+ // input 1 and 2 should have INT32/INT64 type
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_1 = tensors.at(inputs.at(1));
+ switch (tensor_1->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+ const auto &tensor_2 = tensors.at(inputs.at(2));
+ switch (tensor_2->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ // Only support input shape dimension 3 and 4 only
+ const auto &tensor_0 = tensors.at(inputs.at(0));
+ const auto t_0_s = tensor_0->shape.size();
+ if (t_0_s != 3 && t_0_s != 4)
+ return false;
+
+ // TODO check input shape
+
+ return true;
+}
+
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 2)
+ return false;
+
+ if (outputs.size() != 1)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor = tensors.at(inputs.at(0));
+
+ switch (tensor->type)
+ {
+ case circle::TensorType_FLOAT16:
+ case circle::TensorType_FLOAT32:
+ case circle::TensorType_FLOAT64:
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ if (tensors[inputs.at(1)]->type != tensor->type)
+ return false;
+
+ if (tensors[outputs[0]]->type != tensor->type)
+ return false;
+
+ return true;
+}
+
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 2)
+ return false;
+
+ if (outputs.size() != 1)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_axis = tensors.at(inputs.at(1));
+
+ switch (tensor_axis->type)
+ {
+ case circle::TensorType_INT32:
+ case circle::TensorType_INT64:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/ValidateHelpers.h b/compiler/luci/import/src/ValidateHelpers.h
new file mode 100644
index 000000000..4047b2f08
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VALIDATE_HELPERS_H__
+#define __LUCI_VALIDATE_HELPERS_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+/**
+ * @Note Methods in this file provides helper functions to reduce duplicate codes
+ */
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args);
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args);
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args);
+
+} // namespace luci
+
+#endif // __LUCI_VALIDATE_HELPERS_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index e57f5bb3e..25b86d2e9 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -71,6 +71,7 @@
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
#include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
@@ -134,6 +135,7 @@
#include "Nodes/CircleCustomOut.h"
#include "Nodes/CircleIfOut.h"
#include "Nodes/CircleNonMaxSuppressionV4Out.h"
+#include "Nodes/CircleNonMaxSuppressionV5Out.h"
#include "Nodes/CircleUnpackOut.h"
#include "Nodes/CircleUniqueOut.h"
#include "Nodes/CircleSplitOut.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 801051848..9f0a1b16e 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -64,6 +64,7 @@ CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
CIRCLE_NODE(MUL, luci::CircleMul)
CIRCLE_NODE(NEG, luci::CircleNeg)
CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5)
CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
CIRCLE_NODE(PACK, luci::CirclePack)
@@ -130,6 +131,7 @@ CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
new file mode 100644
index 000000000..52d682147
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V5 in Circle
+ */
+class CircleNonMaxSuppressionV5 final
+ : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+{
+public:
+ loco::Node *boxes(void) const { return at(0)->node(); }
+ void boxes(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *scores(void) const { return at(1)->node(); }
+ void scores(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *max_output_size(void) const { return at(2)->node(); }
+ void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+ loco::Node *iou_threshold(void) const { return at(3)->node(); }
+ void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+ loco::Node *score_threshold(void) const { return at(4)->node(); }
+ void score_threshold(loco::Node *node) { at(4)->node(node); }
+
+ loco::Node *soft_nms_sigma(void) const { return at(5)->node(); }
+ void soft_nms_sigma(loco::Node *node) { at(5)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
new file mode 100644
index 000000000..0c6989cc7
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
+ */
+class CircleNonMaxSuppressionV5Out final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+{
+public:
+ CircleNonMaxSuppressionV5Out() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ int32_t index(void) const { return _index; }
+ void index(int32_t index) { _index = index; }
+
+private:
+ int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
index 9f5051317..7e80304b0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
@@ -49,7 +49,7 @@ public:
void validate_indices(bool validate_indices) { _validate_indices = validate_indices; }
private:
- bool _validate_indices{true};
+ bool _validate_indices{false};
};
} // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
index fc638d49f..e355102d6 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -34,7 +34,8 @@ namespace luci
* 'out' acutally means 'out' and 'in' of the this node.
*/
class CircleTransposeConv final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>
+ : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+ public LuciNodeMixin<LuciNodeTrait::Bias>
{
public:
loco::Node *inputSizes(void) const { return at(0)->node(); }
@@ -46,6 +47,21 @@ public:
loco::Node *outBackprop(void) const { return at(2)->node(); }
void outBackprop(Node *node) { at(2)->node(node); }
+ /**
+ * @note "bias" is optional. When this node has no conceptual bias, "bias()"
+ * expected to be `luci::CircleOutputExclude` type.
+ *
+ * <Comment on tflite TRANSPOSE_CONV>
+ *
+ * (Circle node has no dependency on tflite, but just for information on converting)
+ * Before TF v2.3.0, tflite TRANSPOSE_CONV didn't support fused bias as argument.
+ * From TF v2.3.0, tflite TRANSPOSE_CONV supports bias as optional 4th argument.
+ *
+ * Ref: https://github.com/tensorflow/tensorflow/commit/43b8f6e710
+ */
+ loco::Node *bias(void) const override { return at(3)->node(); }
+ void bias(loco::Node *node) override { at(3)->node(node); }
+
public:
const Padding &padding(void) const { return _padding; }
void padding(const Padding &padding) { _padding = padding; }
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index 17ff853eb..0d02d32dc 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -73,6 +73,7 @@ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst:
INSTANTIATE(loco::DataType::S64);
INSTANTIATE(loco::DataType::S32);
INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::S8);
INSTANTIATE(loco::DataType::FLOAT32);
INSTANTIATE(loco::DataType::U8);
INSTANTIATE(loco::DataType::BOOL);
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
new file mode 100644
index 000000000..ceb74e3df
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5Test, constructor)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), nmsv5_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V5, nmsv5_node.opcode());
+
+ ASSERT_EQ(nullptr, nmsv5_node.boxes());
+ ASSERT_EQ(nullptr, nmsv5_node.scores());
+ ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, input_NEG)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+ luci::CircleNonMaxSuppressionV5 node;
+
+ nmsv5_node.boxes(&node);
+ nmsv5_node.scores(&node);
+ nmsv5_node.max_output_size(&node);
+ nmsv5_node.iou_threshold(&node);
+ nmsv5_node.score_threshold(&node);
+ nmsv5_node.soft_nms_sigma(&node);
+ ASSERT_NE(nullptr, nmsv5_node.boxes());
+ ASSERT_NE(nullptr, nmsv5_node.scores());
+ ASSERT_NE(nullptr, nmsv5_node.max_output_size());
+ ASSERT_NE(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_NE(nullptr, nmsv5_node.score_threshold());
+ ASSERT_NE(nullptr, nmsv5_node.soft_nms_sigma());
+
+ nmsv5_node.boxes(nullptr);
+ nmsv5_node.scores(nullptr);
+ nmsv5_node.max_output_size(nullptr);
+ nmsv5_node.iou_threshold(nullptr);
+ nmsv5_node.score_threshold(nullptr);
+ nmsv5_node.soft_nms_sigma(nullptr);
+ ASSERT_EQ(nullptr, nmsv5_node.boxes());
+ ASSERT_EQ(nullptr, nmsv5_node.scores());
+ ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+ ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, arity_NEG)
+{
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ ASSERT_NO_THROW(nmsv5_node.arg(5));
+ ASSERT_THROW(nmsv5_node.arg(6), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
new file mode 100644
index 000000000..7b427ea03
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5OutTest, constructor)
+{
+ luci::CircleNonMaxSuppressionV5Out vout_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT, vout_node.opcode());
+
+ ASSERT_EQ(nullptr, vout_node.input());
+ ASSERT_EQ(-1, vout_node.index());
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
index de3cf6e9a..03f612ba7 100644
--- a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
@@ -33,7 +33,7 @@ TEST(CircleSparseToDenseTest, constructor)
ASSERT_EQ(nullptr, stb_node.values());
ASSERT_EQ(nullptr, stb_node.default_value());
- ASSERT_EQ(true, stb_node.validate_indices());
+ ASSERT_EQ(false, stb_node.validate_indices());
}
TEST(CircleSparseToDenseTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
index 429169744..3e0db803f 100644
--- a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
@@ -69,8 +69,8 @@ TEST(CircleTransposeConvTest, arity_NEG)
{
luci::CircleTransposeConv trc_node;
- ASSERT_NO_THROW(trc_node.arg(2));
- ASSERT_THROW(trc_node.arg(3), std::out_of_range);
+ ASSERT_NO_THROW(trc_node.arg(3));
+ ASSERT_THROW(trc_node.arg(4), std::out_of_range);
}
TEST(CircleTransposeConvTest, visit_mutable_NEG)
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp
index f04a418ef..bb7c73d5f 100644
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -245,10 +245,12 @@ private:
IMPLEMENT(luci::CircleMul)
IMPLEMENT(luci::CircleNeg)
IMPLEMENT(luci::CircleNonMaxSuppressionV4)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV5)
IMPLEMENT(luci::CircleNotEqual)
IMPLEMENT(luci::CircleOneHot)
IMPLEMENT(luci::CirclePack)
IMPLEMENT(luci::CirclePad)
+ IMPLEMENT(luci::CirclePadV2)
IMPLEMENT(luci::CirclePow)
IMPLEMENT(luci::CirclePRelu)
IMPLEMENT(luci::CircleRange)
@@ -306,6 +308,7 @@ private:
IMPLEMENT(luci::CircleOutput)
IMPLEMENT(luci::CircleIfOut)
IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
IMPLEMENT(luci::CircleSplitOut)
IMPLEMENT(luci::CircleSplitVOut)
IMPLEMENT(luci::CircleTopKV2Out)
@@ -380,192 +383,848 @@ bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeS
return true;
}
-bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
+ locop::NodeSummary &s)
{
- if (node->dialect() != luci::CircleDialect::get())
- return false;
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ s.args().append("inputs", tbl->lookup(node->inputs(i)));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
-#define CIRCLE_NODE(OPCODE, CLASS) \
- if (dynamic_cast<const CLASS *>(node)) \
- { \
- s.opname(circle_opname(node->opnum())); \
- return summary(dynamic_cast<const CLASS *>(node), s); \
- }
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_NODE
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- return false;
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
+ locop::NodeSummary &s)
{
- return use_x(tbl(), node, s);
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("y", tbl->lookup(node->y()));
+ s.args().append("adj_x", to_str(node->adj_x()));
+ s.args().append("adj_y", to_str(node->adj_y()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
+ locop::NodeSummary &s)
{
- return use_xy_act(tbl(), node, s);
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_shape", tbl->lookup(node->block_shape()));
+ s.args().append("crops", tbl->lookup(node->crops()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
+ locop::NodeSummary &s)
{
- for (uint32_t i = 0; i < node->arity(); ++i)
- s.args().append("inputs", tbl()->lookup(node->inputs(i)));
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("in_data_type", to_str(node->in_data_type()));
+ s.args().append("out_data_type", to_str(node->out_data_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ s.args().append("values", tbl->lookup(node->values(i)));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
+ locop::NodeSummary &s)
{
- return use_ido(tbl(), node, s);
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("dilation(h,w)", to_str(node->dilation()));
+ s.args().append("padding", to_str(node->padding()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
+ locop::NodeSummary &s)
{
- return use_ido(tbl(), node, s);
+ for (uint32_t i = 0; i < node->numInputs(); i++)
+ {
+ s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
+ }
+ s.args().append("custom_code", node->custom_code());
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_size", std::to_string(node->block_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
+ locop::NodeSummary &s)
{
assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ assert(node->padding() != luci::Padding::UNDEFINED);
- s.args().append("value", tbl()->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("bias", tbl->lookup(node->bias()));
s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("dilation(h,w)", to_str(node->dilation()));
s.args().append("padding", to_str(node->padding()));
+ s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("axis", tbl->lookup(node->axis()));
s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("dims", tbl->lookup(node->dims()));
+ s.args().append("value", tbl->lookup(node->value()));
+ s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
+ locop::NodeSummary &s)
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("y", tbl()->lookup(node->y()));
- s.args().append("adj_x", to_str(node->adj_x()));
- s.args().append("adj_y", to_str(node->adj_y()));
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("weights", tbl->lookup(node->weights()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("fused", to_str(node->fusedActivationFunction()));
s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
+ locop::NodeSummary &s)
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_shape", tbl()->lookup(node->block_shape()));
- s.args().append("crops", tbl()->lookup(node->crops()));
+ s.args().append("params", tbl->lookup(node->params()));
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("params", tbl->lookup(node->params()));
+ s.args().append("indices", tbl->lookup(node->indices()));
s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
+{
+ s.args().append("cond", tbl->lookup(node->cond()));
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ s.args().append("input", tbl->lookup(node->input(i)));
+ if (node->then_graph() != nullptr)
+ s.args().append("then_graph", node->then_graph()->name());
+ else
+ s.args().append("then_branch", pepper::str(node->then_branch()));
+
+ if (node->else_graph() != nullptr)
+ s.args().append("else_graph", node->else_graph()->name());
+ else
+ s.args().append("else_branch", pepper::str(node->else_branch()));
+
+ s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
+ locop::NodeSummary &s)
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("in_data_type", to_str(node->in_data_type()));
- s.args().append("out_data_type", to_str(node->out_data_type()));
+ s.args().append("x", tbl->lookup(node->x()));
+ s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
+ locop::NodeSummary &s)
{
- return use_x(tbl(), node, s);
+ s.args().append("features", tbl->lookup(node->features()));
+ s.args().append("alpha", std::to_string(node->alpha()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("radius", pepper::str(node->radius()));
+ s.args().append("bias", pepper::str(node->bias()));
+ s.args().append("alpha", pepper::str(node->alpha()));
+ s.args().append("beta", pepper::str(node->beta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("logits", tbl->lookup(node->logits()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("diagonal", tbl->lookup(node->diagonal()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("diagonal", tbl->lookup(node->diagonal()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
+ locop::NodeSummary &s)
{
assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- for (uint32_t i = 0; i < node->numValues(); ++i)
- s.args().append("values", tbl()->lookup(node->values(i)));
- s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("filter(h,w)", to_str(node->filter()));
+ s.args().append("stride(h,w)", to_str(node->stride()));
+ s.args().append("padding", to_str(node->padding()));
s.args().append("fused", to_str(node->fusedActivationFunction()));
s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.args().append("mode", to_str(node->mode()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("boxes", tbl->lookup(node->boxes()));
+ s.args().append("scores", tbl->lookup(node->scores()));
+ s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+ s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+ s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("boxes", tbl->lookup(node->boxes()));
+ s.args().append("scores", tbl->lookup(node->scores()));
+ s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+ s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+ s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+ s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("depth", tbl->lookup(node->depth()));
+ s.args().append("on_value", tbl->lookup(node->on_value()));
+ s.args().append("off_value", tbl->lookup(node->off_value()));
+ s.args().append("axis", pepper::str(node->axis()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ s.args().append("values", tbl->lookup(node->values(i)));
+ s.args().append("values_count", pepper::str(node->values_count()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.args().append("constant_values", tbl->lookup(node->constant_values()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("alpha", tbl->lookup(node->alpha()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("start", tbl->lookup(node->start()));
+ s.args().append("limit", tbl->lookup(node->limit()));
+ s.args().append("delta", tbl->lookup(node->delta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("tensor", tbl->lookup(node->tensor()));
+ s.args().append("shape", tbl->lookup(node->shape()));
+ // TODO Show newShape info
s.state(locop::NodeSummary::State::PartiallyKnown);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
+ locop::NodeSummary &s)
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
- assert(node->padding() != luci::Padding::UNDEFINED);
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.args().append("align_corners", node->align_corners() ? "true" : "false");
+ s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("bias", tbl()->lookup(node->bias()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.args().append("align_corners", node->align_corners() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
+ s.args().append("seq_axis", std::to_string(node->seq_axis()));
+ s.args().append("batch_axis", std::to_string(node->batch_axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("tensor", tbl->lookup(node->tensor()));
+ s.args().append("axis", tbl->lookup(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("updates", tbl->lookup(node->updates()));
+ s.args().append("shape", tbl->lookup(node->shape()));
s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
+ s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
+ locop::NodeSummary &s)
{
- return use_x(tbl(), node, s);
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.args().append("t", tbl->lookup(node->t()));
+ s.args().append("e", tbl->lookup(node->e()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
+ locop::NodeSummary &s)
{
- for (uint32_t i = 0; i < node->numInputs(); i++)
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.args().append("t", tbl->lookup(node->t()));
+ s.args().append("e", tbl->lookup(node->e()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("out_type", to_str(node->out_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("begin", tbl->lookup(node->begin()));
+ s.args().append("size", tbl->lookup(node->size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("logits", tbl->lookup(node->logits()));
+ s.args().append("beta", pepper::str(node->beta()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_shape", tbl->lookup(node->block_shape()));
+ s.args().append("paddings", tbl->lookup(node->paddings()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("block_size", pepper::str(node->block_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("output_shape", tbl->lookup(node->output_shape()));
+ s.args().append("values", tbl->lookup(node->values()));
+ s.args().append("default_value", tbl->lookup(node->default_value()));
+ s.args().append("Validate_indices", pepper::str(node->validate_indices()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("split_dim", tbl->lookup(node->split_dim()));
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("num_split", pepper::str(node->num_split()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("size_splits", tbl->lookup(node->size_splits()));
+ s.args().append("split_dim", tbl->lookup(node->split_dim()));
+ s.args().append("num_split", pepper::str(node->num_split()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+
+ std::stringstream ss{"("};
+ for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
{
- s.args().append("input" + std::to_string(i), tbl()->lookup(node->inputs(i)));
+ if (i != 0)
+ ss << ", ";
+ ss << node->squeeze_dims()[i];
}
- s.args().append("custom_code", node->custom_code());
+ ss << ")";
+ s.args().append("squeeze_dims", ss.str());
s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
+ locop::NodeSummary &s)
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_size", std::to_string(node->block_size()));
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("begin", tbl->lookup(node->begin()));
+ s.args().append("end", tbl->lookup(node->end()));
+ s.args().append("strides", tbl->lookup(node->strides()));
+ s.args().append("begin_mask", pepper::str(node->begin_mask()));
+ s.args().append("end_mask", pepper::str(node->end_mask()));
+ s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
+ s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
+ s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("multiples", tbl->lookup(node->multiples()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("k", tbl->lookup(node->k()));
s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("a", tbl->lookup(node->a()));
+ s.args().append("perm", tbl->lookup(node->perm()));
+ s.state(locop::NodeSummary::State::Complete);
return true;
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
- locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
+ locop::NodeSummary &s)
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
assert(node->padding() != luci::Padding::UNDEFINED);
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("bias", tbl()->lookup(node->bias()));
-
+ s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
+ s.args().append("filter", tbl->lookup(node->filter()));
+ s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("dilation(h,w)", to_str(node->dilation()));
s.args().append("padding", to_str(node->padding()));
- s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("idx_out_type", to_str(node->idx_out_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("value", tbl->lookup(node->value()));
+ s.args().append("num", pepper::str(node->num()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("condition", tbl->lookup(node->condition()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
+ locop::NodeSummary &s)
+{
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ s.args().append("input", tbl->lookup(node->input(i)));
+
+ if (node->cond_graph() != nullptr)
+ s.args().append("cond_graph", node->cond_graph()->name());
+ else
+ s.args().append("cond_branch", pepper::str(node->cond_branch()));
+
+ if (node->body_graph() != nullptr)
+ s.args().append("body_graph", node->body_graph()->name());
+ else
+ s.args().append("body_branch", pepper::str(node->body_branch()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("topkv2", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("unique", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("unpack", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("while", tbl->lookup(node->input()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("from", tbl->lookup(node->from()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
+ locop::NodeSummary &s)
+{
+ assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
+ s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
+ s.args().append("bias", tbl->lookup(node->bias()));
+ s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
s.args().append("fused", to_str(node->fusedActivationFunction()));
+ s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input_scales", tbl->lookup(node->input_scales()));
+ s.args().append("input_binary", tbl->lookup(node->input_binary()));
+ s.args().append("indices", tbl->lookup(node->indices()));
+ s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
+ s.args().append("axis", pepper::str(node->axis()));
+ s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
+ locop::NodeSummary &s)
+{
+ auto fused = node->fusedActivationFunction();
+ assert(fused != luci::FusedActFunc::UNDEFINED);
+
+ s.args().append("input", tbl->lookup(node->input()));
+ s.args().append("gamma", tbl->lookup(node->gamma()));
+ s.args().append("beta", tbl->lookup(node->beta()));
+ s.args().append("epsilon", pepper::str(node->epsilon()));
+ s.args().append("fused_activation_function", to_str(fused));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
+bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+ if (node->dialect() != luci::CircleDialect::get())
+ return false;
+
+#define CIRCLE_NODE(OPCODE, CLASS) \
+ if (dynamic_cast<const CLASS *>(node)) \
+ { \
+ s.opname(circle_opname(node->opnum())); \
+ return summary(dynamic_cast<const CLASS *>(node), s); \
+ }
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+
+ return false;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
+{
+ return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
+{
+ return use_xy_act(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
+{
+ return use_ido(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const
+{
+ return use_ido(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
+{
+ return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
+{
+ s.state(locop::NodeSummary::State::PartiallyKnown);
return true;
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
+{
+ return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
{
return use_xy(tbl(), node, s);
@@ -584,10 +1243,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeS
bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("axis", tbl()->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
@@ -609,44 +1265,24 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
{
- s.args().append("dims", tbl()->lookup(node->dims()));
- s.args().append("value", tbl()->lookup(node->value()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("weights", tbl()->lookup(node->weights()));
- s.args().append("bias", tbl()->lookup(node->bias()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const
{
- s.args().append("params", tbl()->lookup(node->params()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node,
locop::NodeSummary &s) const
{
- s.args().append("params", tbl()->lookup(node->params()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const
@@ -662,32 +1298,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleGreaterEqual *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
{
- s.args().append("cond", tbl()->lookup(node->cond()));
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl()->lookup(node->input(i)));
-
- if (node->then_graph() != nullptr)
- s.args().append("then_graph", node->then_graph()->name());
- else
- s.args().append("then_branch", pepper::str(node->then_branch()));
-
- if (node->else_graph() != nullptr)
- s.args().append("else_graph", node->else_graph()->name());
- else
- s.args().append("else_branch", pepper::str(node->else_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node,
locop::NodeSummary &s) const
{
- s.args().append("x", tbl()->lookup(node->x()));
- s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
@@ -704,22 +1321,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLessEqual *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node,
locop::NodeSummary &s) const
{
- s.args().append("features", tbl()->lookup(node->features()));
- s.args().append("alpha", std::to_string(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("radius", pepper::str(node->radius()));
- s.args().append("bias", pepper::str(node->bias()));
- s.args().append("alpha", pepper::str(node->alpha()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
@@ -754,26 +1362,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLogistic *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node,
locop::NodeSummary &s) const
{
- s.args().append("logits", tbl()->lookup(node->logits()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node,
locop::NodeSummary &s) const
{
- s.args().append("diagonal", tbl()->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("diagonal", tbl()->lookup(node->diagonal()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
@@ -784,17 +1385,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::N
bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("value", tbl()->lookup(node->value()));
- s.args().append("filter(h,w)", to_str(node->filter()));
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
- s.args().append("fused", to_str(node->fusedActivationFunction()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
@@ -810,11 +1401,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMinimum *node, locop::N
bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
- s.args().append("mode", to_str(node->mode()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
@@ -830,14 +1417,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeS
bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
locop::NodeSummary &s) const
{
- s.args().append("boxes", pepper::str(node->boxes()));
- s.args().append("scores", pepper::str(node->scores()));
- s.args().append("max_output_size", pepper::str(node->max_output_size()));
- s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
- s.args().append("score_threshold", pepper::str(node->score_threshold()));
+ return summary_node(tbl(), node, s);
+}
- s.state(locop::NodeSummary::State::Complete);
- return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
@@ -848,32 +1434,22 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("depth", tbl()->lookup(node->depth()));
- s.args().append("on_value", tbl()->lookup(node->on_value()));
- s.args().append("off_value", tbl()->lookup(node->off_value()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->values_count(); ++i)
- s.args().append("values", tbl()->lookup(node->values(i)));
- s.args().append("values_count", pepper::str(node->values_count()));
- s.args().append("axis", pepper::str(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
@@ -883,20 +1459,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeS
bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("alpha", tbl()->lookup(node->alpha()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const
{
- s.args().append("start", tbl()->lookup(node->start()));
- s.args().append("limit", tbl()->lookup(node->limit()));
- s.args().append("delta", tbl()->lookup(node->delta()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
@@ -946,52 +1514,31 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleReluN1To1 *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
{
- s.args().append("tensor", tbl()->lookup(node->tensor()));
- s.args().append("shape", tbl()->lookup(node->shape()));
- // TODO Show newShape info
- s.state(locop::NodeSummary::State::PartiallyKnown);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.args().append("align_corners", node->align_corners() ? "true" : "false");
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("seq_lengths", tbl()->lookup(node->seq_lengths()));
- s.args().append("seq_axis", std::to_string(node->seq_axis()));
- s.args().append("batch_axis", std::to_string(node->batch_axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node,
locop::NodeSummary &s) const
{
- s.args().append("tensor", tbl()->lookup(node->tensor()));
- s.args().append("axis", tbl()->lookup(node->axis()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const
@@ -1007,47 +1554,29 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::Nod
bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node,
locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("updates", tbl()->lookup(node->updates()));
- s.args().append("shape", tbl()->lookup(node->shape()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("segment_ids", tbl()->lookup(node->segment_ids()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.args().append("t", tbl()->lookup(node->t()));
- s.args().append("e", tbl()->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node,
locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.args().append("t", tbl()->lookup(node->t()));
- s.args().append("e", tbl()->lookup(node->e()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("out_type", to_str(node->out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
@@ -1057,82 +1586,40 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeS
bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("begin", tbl()->lookup(node->begin()));
- s.args().append("size", tbl()->lookup(node->size()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
{
- s.args().append("logits", tbl()->lookup(node->logits()));
- s.args().append("beta", pepper::str(node->beta()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_shape", tbl()->lookup(node->block_shape()));
- s.args().append("paddings", tbl()->lookup(node->paddings()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("block_size", pepper::str(node->block_size()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node,
locop::NodeSummary &s) const
{
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("output_shape", tbl()->lookup(node->output_shape()));
- s.args().append("values", tbl()->lookup(node->values()));
- s.args().append("default_value", tbl()->lookup(node->default_value()));
-
- s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const
{
- s.args().append("split_dim", tbl()->lookup(node->split_dim()));
- s.args().append("input", tbl()->lookup(node->input()));
-
- s.args().append("num_split", pepper::str(node->num_split()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("size_splits", tbl()->lookup(node->size_splits()));
- s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-
- s.args().append("num_split", pepper::str(node->num_split()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
@@ -1153,38 +1640,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node
bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
-
- std::stringstream ss{"("};
- for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
- {
- if (i != 0)
- ss << ", ";
- ss << node->squeeze_dims()[i];
- }
- ss << ")";
-
- s.args().append("squeeze_dims", ss.str());
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node,
locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("begin", tbl()->lookup(node->begin()));
- s.args().append("end", tbl()->lookup(node->end()));
- s.args().append("strides", tbl()->lookup(node->strides()));
-
- s.args().append("begin_mask", pepper::str(node->begin_mask()));
- s.args().append("end_mask", pepper::str(node->end_mask()));
- s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
- s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
- s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
@@ -1204,92 +1666,44 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTanh *node, locop::Node
bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("multiples", tbl()->lookup(node->multiples()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("k", tbl()->lookup(node->k()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
locop::NodeSummary &s) const
{
- s.args().append("a", tbl()->lookup(node->a()));
- s.args().append("perm", tbl()->lookup(node->perm()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
locop::NodeSummary &s) const
{
- assert(node->padding() != luci::Padding::UNDEFINED);
-
- s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
- s.args().append("filter", tbl()->lookup(node->filter()));
- s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
-
- s.args().append("stride(h,w)", to_str(node->stride()));
- s.args().append("padding", to_str(node->padding()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
{
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("idx_out_type", to_str(node->idx_out_type()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
{
- s.args().append("value", tbl()->lookup(node->value()));
-
- s.args().append("num", pepper::str(node->num()));
- s.args().append("axis", pepper::str(node->axis()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const
{
- s.args().append("condition", tbl()->lookup(node->condition()));
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const
{
- for (uint32_t i = 0; i < node->input_count(); ++i)
- s.args().append("input", tbl()->lookup(node->input(i)));
-
- if (node->cond_graph() != nullptr)
- s.args().append("cond_graph", node->cond_graph()->name());
- else
- s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
- if (node->body_graph() != nullptr)
- s.args().append("body_graph", node->body_graph()->name());
- else
- s.args().append("body_branch", pepper::str(node->body_branch()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node,
@@ -1313,29 +1727,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitVOut *node,
bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
locop::NodeSummary &s) const
{
- s.args().append("topkv2", tbl()->lookup(node->input()));
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
locop::NodeSummary &s) const
{
- s.args().append("unique", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
locop::NodeSummary &s) const
{
- s.args().append("unpack", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const
@@ -1349,14 +1753,16 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out
return use_input(tbl(), node, s);
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node,
locop::NodeSummary &s) const
{
- s.args().append("while", tbl()->lookup(node->input()));
-
- s.state(locop::NodeSummary::State::Complete);
+ return use_input(tbl(), node, s);
+}
- return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
@@ -1367,61 +1773,25 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSum
bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
{
- s.args().append("from", tbl()->lookup(node->from()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node,
locop::NodeSummary &s) const
{
- assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("weights_scales", tbl()->lookup(node->weights_scales()));
- s.args().append("weights_binary", tbl()->lookup(node->weights_binary()));
- s.args().append("bias", tbl()->lookup(node->bias()));
- s.args().append("weights_clusters", tbl()->lookup(node->weights_clusters()));
-
- s.args().append("fused", to_str(node->fusedActivationFunction()));
- s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node,
locop::NodeSummary &s) const
{
- s.args().append("input_scales", tbl()->lookup(node->input_scales()));
- s.args().append("input_binary", tbl()->lookup(node->input_binary()));
- s.args().append("indices", tbl()->lookup(node->indices()));
- s.args().append("input_clusters", tbl()->lookup(node->input_clusters()));
-
- s.args().append("axis", pepper::str(node->axis()));
- s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-
- s.state(locop::NodeSummary::State::Complete);
- return true;
+ return summary_node(tbl(), node, s);
}
bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
locop::NodeSummary &s) const
{
- auto fused = node->fusedActivationFunction();
- assert(fused != luci::FusedActFunc::UNDEFINED);
-
- s.args().append("input", tbl()->lookup(node->input()));
- s.args().append("gamma", tbl()->lookup(node->gamma()));
- s.args().append("beta", tbl()->lookup(node->beta()));
- s.args().append("epsilon", pepper::str(node->epsilon()));
- s.args().append("fused_activation_function", to_str(fused));
-
- s.state(locop::NodeSummary::State::Complete);
-
- return true;
+ return summary_node(tbl(), node, s);
}
} // namespace
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 312749f83..a832844f8 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -32,6 +32,7 @@ public:
{
enum Algorithm
{
+ FuseBatchNormWithTConv,
FuseBCQ,
FuseInstanceNorm,
ResolveCustomOpAdd,
@@ -39,6 +40,7 @@ public:
ResolveCustomOpMatMul,
QuantizeDequantizeWeights,
QuantizeWithMinMax,
+ Requantize,
};
enum AlgorithmParameters
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
new file mode 100644
index 000000000..d3e930a36
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
index 4ba2be772..2442b24ea 100644
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
@@ -14,44 +14,39 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#ifndef __LUCI_REQUANTIZE_PASS_H__
+#define __LUCI_REQUANTIZE_PASS_H__
-#include <backend/IPortableTensor.h>
+#include <loco.h>
-#include <exec/IFunction.h>
+#include <logo/Pass.h>
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
+#include <luci/Pass/QuantizationParameters.h>
+
+namespace luci
{
-class ReLULayer : public ::onert::exec::IFunction
+/**
+ * @brief Pass to quantize weights
+ */
+class RequantizePass : public logo::Pass
{
public:
- ReLULayer();
+ RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
+ : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+ {
+ // DO NOTHING
+ }
+ virtual const char *name(void) const { return "luci::RequantizePass"; }
public:
- void reluFloat32();
-
- void reluQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
+ bool run(loco::Graph *graph);
private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
+ loco::DataType _input_dtype;
+ loco::DataType _output_dtype;
};
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+} // namespace luci
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#endif //__LUCI_REQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 2edf7a9c6..2ee759b4e 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,11 +16,13 @@
#include "luci/CircleOptimizer.h"
+#include "luci/Pass/FuseBatchNormWithTConv.h"
#include "luci/Pass/FuseBCQPass.h"
#include "luci/Pass/FuseInstanceNormPass.h"
#include "luci/Pass/ResolveCustomOpAddPass.h"
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+#include "luci/Pass/RequantizePass.h"
#include "luci/Pass/QuantizeWithMinMaxPass.h"
#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
// TODO add more passes
@@ -34,6 +36,7 @@
#include "ProgressReporter.h"
#include "CircleOptimizerUtils.h"
+#include <luci/IR/CircleNodes.h>
#include <logo/Phase.h>
#include <memory>
@@ -125,6 +128,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<FuseBCQPass>());
}
+ if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
+ {
+ phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
+ }
// Shape inference is needed for added nodes doing above transformations
phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
@@ -163,6 +170,14 @@ void CircleOptimizer::quantize(loco::Graph *g) const
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(fakeq_supported_granularity));
+ // Clear existing quantparams before doing fake quantization
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->quantparam() != nullptr)
+ circle_node->quantparam(nullptr);
+ }
+
luci::QuantizeDequantizeWeightsPass fake_quantizer(
str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
fake_quantizer.run(g);
@@ -196,6 +211,27 @@ void CircleOptimizer::quantize(loco::Graph *g) const
quantizer.run(g);
}
+ // Requantize
+ if (_options->query(Options::Algorithm::Requantize))
+ {
+ static const std::vector<std::string> rq_supported_input_dtype{"int8"};
+ static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+
+ auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+ auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+
+ if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(rq_supported_input_dtype));
+
+ if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(rq_supported_output_dtype));
+
+ luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+ requantizer.run(g);
+ }
+
logo::Phase phase;
// Do Shape/Type inference
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp
index 260de5b30..7aa2e3e80 100644
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -38,9 +38,9 @@ const std::string node_name_prefix(luci::NodeName node_name)
{
std::string prefix = node_name;
- if (prefix.find("ReadVariableOp/resource/") != std::string::npos)
+ if (prefix.find("/ReadVariableOp/resource") != std::string::npos)
{
- const auto start_index = prefix.find("ReadVariableOp/resource/");
+ const auto start_index = prefix.find("/ReadVariableOp/resource");
const auto left_prefix = prefix.substr(0, start_index);
const auto right_prefix = prefix.substr(start_index + 24);
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
new file mode 100644
index 000000000..e39455b1a
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConv.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+/**
+ * NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
+ *
+ * BEFORE
+ *
+ * [CircleTransposeConv]
+ * |
+ * [mul]
+ * |
+ * [add]
+ * AFTER
+ *
+ * [CircleTransposeConv]
+ */
+bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
+{
+ // check whether it has bias or not. This optimization works only if it doesn't.
+ auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
+ if (not bias)
+ return false;
+
+ // get weight of tconv
+ auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+ if (not filter)
+ return false;
+ if (filter->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // get mul node
+ auto tconv_output = loco::succs(tconv);
+ assert(tconv_output.size() == 1);
+ auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
+ if (not mul)
+ return false;
+ if (mul->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // get add node
+ auto mul_output = loco::succs(mul);
+ assert(mul_output.size() == 1);
+ auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
+ if (not add)
+ return false;
+ if (add->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ return false;
+
+ // get scale of batchnorm
+ auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
+ if (not scale)
+ return false;
+
+ // scale dim(0) == tconv filter channel dim
+ if (filter->rank() != 4)
+ return false;
+ auto filter_channel_dim = filter->dim(3).value();
+ if (scale->rank() != 1)
+ return false;
+ auto scale_dim = scale->dim(0).value();
+ if (filter_channel_dim != scale_dim)
+ return false;
+
+ // get shift of batchnorm
+ auto shift = dynamic_cast<luci::CircleConst *>(add->y());
+ if (not shift)
+ return false;
+
+ // shift dim(0) == tconv filter channel dim
+ if (shift->rank() != 1)
+ return false;
+ auto shift_dim = shift->dim(0).value();
+ if (filter_channel_dim != shift_dim)
+ return false;
+
+ // filter weight = filter weight * mul(scale) + add(shift)
+ uint32_t filter_batch_dim = filter->dim(0).value();
+ uint32_t filter_height_dim = filter->dim(1).value();
+ uint32_t filter_width_dim = filter->dim(2).value();
+ for (uint32_t c = 0; c < filter_channel_dim; c++)
+ {
+ for (uint32_t n = 0; n < filter_batch_dim; n++)
+ {
+ for (uint32_t h = 0; h < filter_height_dim; h++)
+ {
+ for (uint32_t w = 0; w < filter_width_dim; w++)
+ {
+ uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim +
+ h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c;
+ filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
+ }
+ }
+ }
+ }
+
+ // fuse shift with transposed conv
+ tconv->bias(shift);
+
+ if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+ {
+ // separate relu op from add op
+ auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+ relu->features(tconv);
+
+ // remove mul node
+ replace(add).with(relu);
+ }
+ else
+ {
+ replace(add).with(tconv);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
+ if (not tconv)
+ continue;
+
+ changed |= fused_batch_norm_with_tconv(tconv);
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index b335a53b4..60c1cdd72 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -472,7 +472,12 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
if (granularity == QuantizationGranularity::ChannelWise)
{
auto quantparam = circle_node->quantparam();
- assert(quantparam != nullptr);
+ if (quantparam == nullptr)
+ {
+ assert(false && "quantparam is nullptr");
+ return false;
+ }
+
auto min = quantparam->min;
auto scaling_factor = quantparam->scale;
int32_t channel_dim_index = 0;
diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp
new file mode 100644
index 000000000..49fbf76ec
--- /dev/null
+++ b/compiler/luci/pass/src/RequantizePass.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
+bool is_bias(CircleConst *node)
+{
+ if (node == nullptr)
+ return false;
+
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume bias is used by only one node
+ return false;
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ if (conv != nullptr && conv->bias() == node)
+ return true;
+
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ if (dw_conv != nullptr && dw_conv->bias() == node)
+ return true;
+
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+ if (fc != nullptr && fc->bias() == node)
+ return true;
+
+ // TODO: add TransposeConv when bias is supported in CircleTransposeConv
+ }
+ return false;
+}
+
+void requant_nonconst_int8_to_uint8(CircleNode *circle_node)
+{
+ assert(circle_node->dtype() == loco::DataType::S8);
+
+ auto quantparam = circle_node->quantparam();
+ assert(quantparam != nullptr);
+ for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+ {
+ quantparam->zerop[i] += 128;
+ }
+ circle_node->dtype(loco::DataType::U8);
+}
+
+// Requantize CircleConst from symmetric int8 to asymmetric uint8
+// Original values: -127 ~ 127
+// After requantization: 1 ~ 255 (zp <- zp + 128)
+void requant_const_int8_to_uint8(CircleConst *node)
+{
+ assert(node->dtype() == loco::DataType::S8);
+
+ uint32_t size = node->size<loco::DataType::S8>();
+ std::vector<int32_t> requantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ int32_t data = node->at<loco::DataType::S8>(i);
+ requantized_values[i] = data + 128;
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(1 <= requantized_values[i] && requantized_values[i] <= 255);
+ node->at<loco::DataType::U8>(i) = requantized_values[i];
+ }
+
+ auto quantparam = node->quantparam();
+ assert(quantparam != nullptr);
+ for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+ {
+ quantparam->zerop[i] += 128;
+ }
+}
+
+/**
+ * @brief RequantizeNonConst requantizes tensors for activations
+ */
+struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+ RequantizeNonConst(loco::DataType input, loco::DataType output)
+ : _input_type(input), _output_type(output)
+ {
+ }
+
+ loco::DataType _input_type;
+ loco::DataType _output_type;
+
+ // Requantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this was quantized (only quantized tensors are requantized)
+ if (circle_node->quantparam() == nullptr)
+ continue;
+
+ // Check if this is already requantized
+ if (circle_node->dtype() == _output_type)
+ continue;
+
+ // Check if this is not const (only non-const is requantized in this function)
+ auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+ if (circle_const != nullptr)
+ continue;
+
+ if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+ requant_nonconst_int8_to_uint8(circle_node);
+ }
+ return false;
+ }
+};
+
+/**
+ * @brief RequantizeConst requantizes tensors for weights
+ */
+struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+ RequantizeConst(loco::DataType input, loco::DataType output)
+ : _input_type(input), _output_type(output)
+ {
+ }
+
+ loco::DataType _input_type;
+ loco::DataType _output_type;
+
+ // Requantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this was quantized (only quantized tensors are requantized)
+ if (circle_node->quantparam() == nullptr)
+ continue;
+
+ // Check if this is already requantized
+ if (circle_node->dtype() == _output_type)
+ continue;
+
+ // Check if this is const (only const is requantized in this function)
+ auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+ if (circle_const == nullptr)
+ continue;
+
+ // Check if this is not bias
+ // bias is not requantized when int8 -> uint8
+ if (is_bias(circle_const))
+ continue;
+
+ if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+ requant_const_int8_to_uint8(circle_const);
+ }
+ return false;
+ }
+};
+
+} // namespace
+
+bool RequantizePass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "RequantizePass Start" << std::endl;
+
+ // Requantize non-const (activations)
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ RequantizeNonConst rqnc(_input_dtype, _output_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&rqnc);
+ }
+
+ // Requantize const (including weights, constants)
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ RequantizeConst rqc(_input_dtype, _output_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&rqc);
+ }
+
+ // Update output dtype
+ auto graph_outputs = g->outputs();
+ for (auto node : loco::output_nodes(g))
+ {
+ auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
+ if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+ {
+ circle_node->dtype(_output_dtype);
+ auto graph_output = graph_outputs->at(circle_node->index());
+ graph_output->dtype(_output_dtype);
+ }
+ }
+
+ INFO(l) << "RequantizePass End" << std::endl;
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index 6355ec546..db25186b1 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -47,49 +47,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
return os;
}
-// Call this for CircleAvgPool2D and CircleMaxPool2D only
-template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+loco::TensorShape own_shape(const luci::CircleNode *node)
{
- LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
- auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
- assert(ifm_shape.rank() == 4);
-
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t window_height = node->filter()->h();
- uint32_t window_width = node->filter()->w();
- uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
- uint32_t dilation_width = 1;
- uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
- uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
-
- uint32_t output_height = 0;
- uint32_t output_width = 0;
-
- if (node->padding() == luci::Padding::VALID)
- {
- output_height = (input_height + stride_height - effective_window_height) / stride_height;
- output_width = (input_width + stride_width - effective_window_width) / stride_width;
- }
- else if (node->padding() == luci::Padding::SAME)
- {
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
- }
- else
- LUCI_ASSERT(false, "Wrong padding type");
-
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ifm_shape.dim(3);
+ loco::TensorShape shape;
+ shape.rank(node->rank());
+ for (uint32_t r = 0; r < node->rank(); ++r)
+ shape.dim(r) = loco::Dimension(node->dim(r).value());
+ return shape;
+}
- return loco::NodeShape{ofm_shape};
+loco::NodeShape use_own(const luci::CircleNode *node)
+{
+ loco::TensorShape shape = own_shape(node);
+ return loco::NodeShape{shape};
}
/**
@@ -192,6 +162,304 @@ loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::Tensor
return output_shape;
}
+/**
+ * @brief vector_from_constant will return int64_t vector from CircleConst node
+ */
+template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node)
+{
+ std::vector<int64_t> result;
+
+ for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx)
+ result.push_back(const_node->at<T>(idx));
+
+ return result;
+}
+
+template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
+{
+ auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(x_shape, y_shape);
+
+ return loco::NodeShape{output_shape};
+}
+
+template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
+{
+ auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+ return loco::NodeShape{x_shape};
+}
+
+template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
+{
+ auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
+ return loco::NodeShape{shape};
+}
+
+template <class CIRCLENODE>
+loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+
+ // TODO support other data type
+ LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
+ LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+
+ int32_t n = paddings->dim(0).value();
+ int32_t v = paddings->dim(1).value();
+
+ LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
+ LUCI_ASSERT(n == int32_t(input_shape.rank()),
+ "paddings [n, 2] should have same value of input rank");
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(input_shape.rank());
+ for (int32_t ni = 0; ni < n; ++ni)
+ {
+ int32_t idx = ni * 2;
+ int value = input_shape.dim(ni).value();
+ value += paddings->at<S32>(idx + 0); // left
+ value += paddings->at<S32>(idx + 1); // right
+ output_shape.dim(ni) = value;
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_add_n(const luci::CircleAddN *node)
+{
+ auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+
+ for (uint32_t idx = 1; idx < node->arity(); ++idx)
+ {
+ auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+ if (!(shape == shape_idx))
+ {
+ INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
+ }
+ }
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+
+ int64_t select_axis = 0;
+ {
+ LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+
+ // Only support node's shape() is CircleConst with S32/S64
+ // Support S32 for now.
+ auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+ LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst for CircleArgMax");
+
+ if (const_shape_node->rank() > 1)
+ INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
+
+ select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ }
+ assert(select_axis < input_shape.rank());
+ assert(select_axis >= 0); // TODO support minus of this breaks
+
+ // NOTE select_axis is removed
+ loco::TensorShape shape_output;
+ uint32_t rank = input_shape.rank();
+ uint32_t shrink = static_cast<uint32_t>(select_axis);
+ assert(rank > 0);
+ shape_output.rank(rank - 1);
+ for (uint32_t r = 0, d = 0; r < rank; ++r)
+ {
+ if (r == shrink)
+ continue;
+ shape_output.dim(d++) = input_shape.dim(r);
+ }
+ return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+
+ int64_t select_axis = 0;
+ {
+ LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+
+ // Only support node's shape() is CircleConst with S32/S64
+ // Support S32 for now.
+ auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+ LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst for CircleArgMin");
+
+ if (const_shape_node->rank() > 1)
+ INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+ oops::to_uint32(const_shape_node->rank()));
+
+ select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ }
+ assert(select_axis < input_shape.rank());
+ assert(select_axis >= 0); // TODO support minus of this breaks
+
+ // NOTE select_axis is removed
+ loco::TensorShape shape_output;
+ uint32_t rank = input_shape.rank();
+ uint32_t shrink = static_cast<uint32_t>(select_axis);
+ assert(rank > 0);
+ shape_output.rank(rank - 1);
+ for (uint32_t r = 0, d = 0; r < rank; ++r)
+ {
+ if (r == shrink)
+ continue;
+ shape_output.dim(d++) = input_shape.dim(r);
+ }
+ return loco::NodeShape{shape_output};
+}
+
+// Call this for CircleAvgPool2D and CircleMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+ LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
+
+ auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t window_height = node->filter()->h();
+ uint32_t window_width = node->filter()->w();
+ uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
+ uint32_t dilation_width = 1;
+ uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+ uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_window_height) / stride_height;
+ output_width = (input_width + stride_width - effective_window_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = output_height;
+ ofm_shape.dim(2) = output_width;
+ ofm_shape.dim(3) = ifm_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ // Support only input rank is 3 and 4
+ assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+ // Only support block_shape() with S32 type CircleConst for now
+ auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+ LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, "Only support int32 block_shape");
+
+ // Only support crops() with S32 type CircleConst for now
+ auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
+ LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+
+ auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+ assert(const_block_shape_shape.rank() == 1);
+ assert(const_crops_shape.rank() == 2);
+
+ int32_t input_spatial_dim = input_shape.rank() - 2;
+ assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(0) == input_spatial_dim);
+ assert(const_crops_shape.dim(1) == 2);
+
+ loco::TensorShape shape_output;
+
+ shape_output.rank(input_shape.rank());
+
+ int32_t output_batch_size = input_shape.dim(0).value();
+ for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+ {
+ int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
+ dim_size -= const_crops->at<S32>(dim * 2);
+ dim_size -= const_crops->at<S32>(dim * 2 + 1);
+ shape_output.dim(dim + 1) = dim_size;
+
+ assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
+ output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
+ }
+ shape_output.dim(0) = output_batch_size;
+ shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+ return loco::NodeShape{shape_output};
+}
+
+struct OutputSize
+{
+ uint32_t height = 0;
+ uint32_t width = 0;
+};
+
+template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
+{
+ auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+ auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+
+ uint32_t input_height = ifm_shape.dim(1).value();
+ uint32_t input_width = ifm_shape.dim(2).value();
+ uint32_t stride_height = node->stride()->h();
+ uint32_t stride_width = node->stride()->w();
+ uint32_t ker_height = ker_shape.dim(1).value();
+ uint32_t ker_width = ker_shape.dim(2).value();
+ uint32_t dilation_height = node->dilation()->h();
+ uint32_t dilation_width = node->dilation()->w();
+ uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+ uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (node->padding() == luci::Padding::VALID)
+ {
+ output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+ output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+ }
+ else if (node->padding() == luci::Padding::SAME)
+ {
+ output_height = (input_height + stride_height - 1) / stride_height;
+ output_width = (input_width + stride_width - 1) / stride_width;
+ }
+ else
+ LUCI_ASSERT(false, "Wrong padding type");
+
+ OutputSize os{output_height, output_width};
+
+ return os;
+}
+
// BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
// TODO Distinguish BatchMatMul and BatchMatMulV2
loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
@@ -238,13 +506,325 @@ loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
return loco::NodeShape{output_shape};
}
-loco::TensorShape own_shape(const luci::CircleNode *node)
+loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
+{
+ // TODO Support when CircleConcatenation has 0 input
+ assert(node->numValues() > 0);
+
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank();
+
+ assert(0 <= axis);
+ assert(first_shape.rank() > static_cast<uint32_t>(axis));
+
+ loco::TensorShape output_shape;
+
+ output_shape.rank(first_shape.rank());
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ output_shape.dim(i) = first_shape.dim(i);
+
+ for (uint32_t i = 1; i < node->numValues(); ++i)
+ {
+ auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+ for (uint32_t j = 0; j < output_shape.rank(); ++j)
+ {
+ if (j == static_cast<uint32_t>(axis))
+ output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+ else
+ assert(output_shape.dim(j) == input_shape.dim(j));
+ }
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
+{
+ LOGGER(l);
+
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+ INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
+ << ")" << std::endl;
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+ auto os = infer_conv2d_type(node);
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = os.height;
+ ofm_shape.dim(2) = os.width;
+ ofm_shape.dim(3) = ker_shape.dim(0);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+ // Only data format NHWC is supported
+ // TODO need to clarify what to do with layout in this operator
+ int32_t height = input_shape.dim(1).value();
+ int32_t width = input_shape.dim(2).value();
+ int32_t depth = input_shape.dim(3).value();
+
+ int block_size = node->block_size();
+
+ if (block_size < 2)
+ INTERNAL_EXN("Block size must be >= 2");
+
+ if (depth % (block_size * block_size))
+ {
+ INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+ }
+
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+
+ output_shape.dim(0) = input_shape.dim(0).value();
+ output_shape.dim(1) = height * block_size;
+ output_shape.dim(2) = width * block_size;
+ output_shape.dim(3) = depth / (block_size * block_size);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
+{
+ auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+ assert(ifm_shape.rank() == 4);
+ assert(ker_shape.rank() == 4);
+ assert(ker_shape.dim(0).value() == 1);
+
+ auto os = infer_conv2d_type(node);
+
+ loco::TensorShape ofm_shape;
+ ofm_shape.rank(4);
+ ofm_shape.dim(0) = ifm_shape.dim(0);
+ ofm_shape.dim(1) = os.height;
+ ofm_shape.dim(2) = os.width;
+ ofm_shape.dim(3) = ker_shape.dim(3);
+
+ return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ if (x_shape.rank() == 0)
+ {
+ // This maybe for unknown shape. We use shape from the node itself.
+ return use_own(node);
+ }
+ auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
+ LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
+ if (const_axis->rank() != 0 && const_axis->rank() != 1)
+ {
+ INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+ }
+ int32_t axis = const_axis->at<S32>(0);
+ LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
+ (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+ "Axis has to be between [-(D+1), D], where D is rank of input.");
+ size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
+ loco::TensorShape output_shape;
+ output_shape.rank(x_shape.rank() + 1);
+ size_t i = 0;
+ for (; i < positive_axis; i++)
+ output_shape.dim(i) = x_shape.dim(i);
+ output_shape.dim(i) = loco::Dimension(1);
+ for (; i < x_shape.rank(); i++)
+ output_shape.dim(i + 1) = x_shape.dim(i);
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_fill(const luci::CircleFill *node)
{
loco::TensorShape shape;
- shape.rank(node->rank());
- for (uint32_t r = 0; r < node->rank(); ++r)
- shape.dim(r) = loco::Dimension(node->dim(r).value());
- return shape;
+ {
+ LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+
+ auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
+ if (dims_node != nullptr)
+ {
+ // Only support node with S32
+ LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+
+ if (dims_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+
+ shape.rank(dims_node->dim(0).value());
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
+ }
+ }
+ else
+ {
+ shape = own_shape(node);
+ }
+ }
+
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+ // Checking shape capability for fully connected layer
+ // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
+ // Weight: [# of units, K]
+ // Output: [D1 * D2 * ... * Dn / K, # of units]
+ if (input_shape.rank() < 2 || weights_shape.rank() != 2)
+ {
+ // Return node own shape if shape inference is not possible
+ return use_own(node);
+ }
+
+ uint32_t input_size = 1;
+ for (uint32_t i = 0; i < input_shape.rank(); i++)
+ {
+ input_size = input_size * input_shape.dim(i).value();
+ }
+ const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+ loco::TensorShape out_shape;
+ out_shape.rank(2);
+ out_shape.dim(0) = batch_size;
+ out_shape.dim(1) = weights_shape.dim(0);
+
+ return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_gather(const luci::CircleGather *node)
+{
+ loco::TensorShape output_shape;
+
+ const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+ const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ int32_t axis = node->axis();
+
+ // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
+ // shape that node already has.
+ if (input_shape.rank() == 0 || positions_shape.rank() == 0)
+ return use_own(node);
+
+ if (axis < 0)
+ axis += input_shape.rank();
+
+ output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
+ int32_t outdim_index = 0;
+ for (int32_t i = 0; i < axis; ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+ for (uint32_t i = 0; i < positions_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = positions_shape.dim(i);
+ for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
+{
+ loco::TensorShape output_shape;
+
+ const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+ const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+
+ const auto params_rank = params_shape.rank();
+ const auto indices_rank = indices_shape.rank();
+
+ // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
+ // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
+ // batch_dims isn't supported in tflite
+
+ // TODO: replace exceptions with setting shape to unknown?
+
+ if (!indices_shape.dim(indices_rank - 1).known())
+ INTERNAL_EXN("Last indices dimension is unknown");
+
+ auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+
+ if (indices_last_dim > params_rank)
+ INTERNAL_EXN("Last indices dimension should be <= params rank");
+
+ const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+
+ output_shape.rank(output_rank);
+
+ uint32_t output_index = 0;
+ for (uint32_t i = 0; i < indices_rank - 1; ++i)
+ {
+ auto &dim = indices_shape.dim(i);
+ if (!dim.known())
+ INTERNAL_EXN("Unknown indices dimension is unsupported");
+ output_shape.dim(output_index++).set(dim.value());
+ }
+
+ for (uint32_t i = indices_last_dim; i < params_rank; ++i)
+ {
+ auto &dim = params_shape.dim(i);
+ if (!dim.known())
+ INTERNAL_EXN("Unknown params dimension is unsupported");
+ output_shape.dim(output_index++).set(dim.value());
+ }
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
+{
+ loco::TensorShape output_shape;
+
+ auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+ auto rank = diagonal_shape.rank();
+
+ output_shape.rank(rank + 1);
+
+ for (uint32_t i = 0; i < rank; i++)
+ {
+ output_shape.dim(i) = diagonal_shape.dim(i);
+ }
+
+ output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+
+ auto rank = diagonal_shape.rank();
+
+ LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+
+ for (uint32_t i = 0; i < rank - 1; i++)
+ {
+ LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
+ }
+
+ auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+
+ LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+
+ return loco::NodeShape{input_shape};
}
loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
@@ -302,885 +882,1311 @@ loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indic
return output_shape;
}
-/**
- * @brief vector_from_constant will return int64_t vector from CircleConst node
- */
-template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node)
+loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
{
- std::vector<int64_t> result;
-
- for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx)
- result.push_back(const_node->at<T>(idx));
+ // TODO support non-const case
+ auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ return use_paddings(node, paddings);
+}
- return result;
+loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ // Only support OneHot node's depth() is CircleConst with type S32
+ // TODO support depth with other types
+ auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
+ LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
+ if (depth->rank() != 0)
+ INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
+ loco::TensorShape output_shape;
+ output_shape.rank(indices_shape.rank() + 1);
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += indices_shape.rank() + 1;
+ LUCI_ASSERT(0 <= axis, "Axis is out of range");
+ LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
+ uint32_t j = 0;
+ for (uint32_t i = 0; i < output_shape.rank(); i++)
+ {
+ if (i == static_cast<uint32_t>(axis))
+ {
+ output_shape.dim(i) = depth->at<S32>(0);
+ }
+ else
+ {
+ output_shape.dim(i) = indices_shape.dim(j++);
+ }
+ }
+ return loco::NodeShape{output_shape};
}
-template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
+loco::NodeShape infer_pack(const luci::CirclePack *node)
{
- auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
- auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+ LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
- auto output_shape = broadcast_shape(x_shape, y_shape);
+ auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ // Make sure all inputs have the same shape.
+ for (uint32_t i = 1; i < node->values_count(); ++i)
+ {
+ auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
+ "All inputs must have the same shape");
+ }
+
+ // Checking shape capability for pack layer
+ // Input: tensors [D1, D2, ... Dn]
+ // Axis: K
+ // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
+ auto axis = node->axis();
+ if (axis < 0)
+ axis += first_shape.rank() + 1;
+
+ LUCI_ASSERT(0 <= axis, "Axis is out of range");
+ LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+
+ loco::TensorShape output_shape;
+ output_shape.rank(first_shape.rank() + 1);
+
+ uint32_t j = 0;
+ for (uint32_t i = 0; i < output_shape.rank(); ++i)
+ {
+ if (i == static_cast<uint32_t>(axis))
+ {
+ output_shape.dim(i) = node->values_count();
+ }
+ else
+ {
+ output_shape.dim(i) = first_shape.dim(j++);
+ }
+ }
return loco::NodeShape{output_shape};
}
-template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
+loco::NodeShape infer_pad(const luci::CirclePad *node)
{
- auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
- return loco::NodeShape{x_shape};
+ // TODO support non-const case
+ auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ return use_paddings(node, paddings);
}
-template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
+loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
{
- auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
- return loco::NodeShape{shape};
+ // TODO support non-const case
+ auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+ if (!paddings)
+ {
+ auto node_shape = own_shape(node);
+ return loco::NodeShape{node_shape};
+ }
+ return use_paddings(node, paddings);
}
-loco::NodeShape use_own(const luci::CircleNode *node)
+loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
{
- loco::TensorShape shape = own_shape(node);
- return loco::NodeShape{shape};
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+
+ auto output_shape = broadcast_shape(input_shape, alpha_shape);
+
+ return loco::NodeShape{output_shape};
}
-/**
- * @brief Class to infer the shape of CircleNode
- *
- * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
- */
-class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+loco::NodeShape infer_range(const luci::CircleRange *node)
{
-public:
- loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+ loco::TensorShape output_shape;
+ output_shape.rank(1);
- loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+ auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
+ auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
+ auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
- loco::NodeShape visit(const luci::CircleAddN *node) final
+ if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
{
- auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+ return use_own(node);
+ }
- for (uint32_t idx = 1; idx < node->arity(); ++idx)
- {
- auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
- if (!(shape == shape_idx))
- {
- INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
- }
- }
+ double start = 0, limit = 0, delta = 0;
+
+#define GET_RANGE_PARAM(DT) \
+ start = start_node->scalar<DT>(); \
+ limit = limit_node->scalar<DT>(); \
+ delta = delta_node->scalar<DT>();
- return loco::NodeShape{shape};
+ switch (start_node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ GET_RANGE_PARAM(loco::DataType::FLOAT32)
+ break;
+ case loco::DataType::S32:
+ GET_RANGE_PARAM(loco::DataType::S32)
+ break;
+ default:
+ INTERNAL_EXN("Range data type not supported");
}
- loco::NodeShape visit(const luci::CircleArgMax *node) final
+#undef GET_RANGE_PARAM
+
+ if (delta == 0)
+ INTERNAL_EXN("Delta can not be zero");
+
+ output_shape.dim(0) = ceil((limit - start) / delta);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_reshape(const luci::CircleReshape *node)
+{
+ LOGGER(l);
+
+ const loco::DataType S32 = loco::DataType::S32;
+
+ loco::TensorShape shape_by_input;
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+ LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
- int64_t select_axis = 0;
+ // Only support node's shape() is CircleConst with S32
+ // TODO support other node with other types
+ auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
+ if (const_shape_node != nullptr)
{
- LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
- // Only support node's shape() is CircleConst with S32/S64
- // Support S32 for now.
- auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
- LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMax");
+ LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
- if (const_shape_node->rank() > 1)
- INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
- oops::to_uint32(const_shape_node->rank()));
+ shape_by_input.rank(const_shape_node->size<S32>());
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
+ {
+ shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
+ }
}
- assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
-
- // NOTE select_axis is removed
- loco::TensorShape shape_output;
- uint32_t rank = input_shape.rank();
- uint32_t shrink = static_cast<uint32_t>(select_axis);
- assert(rank > 0);
- shape_output.rank(rank - 1);
- for (uint32_t r = 0, d = 0; r < rank; ++r)
+ else
{
- if (r == shrink)
- continue;
- shape_output.dim(d++) = input_shape.dim(r);
+ // We use shape from the node itself
+ shape_by_input = own_shape(node);
}
- return loco::NodeShape{shape_output};
}
- loco::NodeShape visit(const luci::CircleArgMin *node) final
+ loco::TensorShape shape_by_attr;
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+ shape_by_attr.rank(node->newShape()->rank());
- int64_t select_axis = 0;
+ for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
{
- LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+ shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+ }
+ }
- // Only support node's shape() is CircleConst with S32/S64
- // Support S32 for now.
- auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
- LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMin");
+ if (!(shape_by_input == shape_by_attr))
+ {
+ INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
+ INFO(l) << " shape_by_input : " << shape_by_input << std::endl;
+ INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl;
+ }
- if (const_shape_node->rank() > 1)
- INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
- oops::to_uint32(const_shape_node->rank()));
+ loco::TensorShape output_shape = shape_by_input;
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
+ // One of the dimensions can have special value -1, meaning its actual value should be inferred.
+ const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+ const uint32_t input_element_count = loco::element_count(&input_shape);
+ uint32_t output_element_count = 1;
+ uint32_t unknown_dim_index = UINT32_MAX;
+ for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
+ {
+ const uint32_t dim_value = output_shape.dim(dim_index).value();
+ if (static_cast<int>(dim_value) == -1)
+ {
+ LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
+ unknown_dim_index = dim_index;
}
- assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
-
- // NOTE select_axis is removed
- loco::TensorShape shape_output;
- uint32_t rank = input_shape.rank();
- uint32_t shrink = static_cast<uint32_t>(select_axis);
- assert(rank > 0);
- shape_output.rank(rank - 1);
- for (uint32_t r = 0, d = 0; r < rank; ++r)
+ else
{
- if (r == shrink)
- continue;
- shape_output.dim(d++) = input_shape.dim(r);
+ output_element_count *= dim_value;
}
- return loco::NodeShape{shape_output};
}
-
- loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+ if (unknown_dim_index != UINT32_MAX)
{
- return infer_pool_2d_shape(node);
+ output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
}
- loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
- {
- auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
- auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+ return loco::NodeShape{output_shape};
+}
- return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
- }
+loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
+ if (input_shape.rank() != 4)
+ INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- // Support only input rank is 3 and 4
- assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+ auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
- // Only support block_shape() with S32 type CircleConst for now
- auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
- LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
- "Only support int32 block_shape");
+ if (const_node->dtype() != loco::DataType::S32)
+ INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
- // Only support crops() with S32 type CircleConst for now
- auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
- LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+ if (const_node->rank() != 1)
+ INTERNAL_EXN("Expected size tensor of rank 1");
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
- assert(const_block_shape_shape.rank() == 1);
- assert(const_crops_shape.rank() == 2);
+ if (const_node->dim(0).value() != 2)
+ INTERNAL_EXN("Expected size tensor with shape [2]");
- int32_t input_spatial_dim = input_shape.rank() - 2;
- assert(const_block_shape_shape.dim(0) == input_spatial_dim);
- assert(const_crops_shape.dim(0) == input_spatial_dim);
- assert(const_crops_shape.dim(1) == 2);
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+ output_shape.dim(0) = input_shape.dim(0);
+ output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+ output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+ output_shape.dim(3) = input_shape.dim(3);
- loco::TensorShape shape_output;
+ return loco::NodeShape{output_shape};
+}
- shape_output.rank(input_shape.rank());
+loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- int32_t output_batch_size = input_shape.dim(0).value();
- for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
- {
- int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
- dim_size -= const_crops->at<S32>(dim * 2);
- dim_size -= const_crops->at<S32>(dim * 2 + 1);
- shape_output.dim(dim + 1) = dim_size;
+ if (input_shape.rank() != 4)
+ INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
- assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
- output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
- }
- shape_output.dim(0) = output_batch_size;
- shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+ auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
- return loco::NodeShape{shape_output};
- }
+ if (const_node->dtype() != loco::DataType::S32)
+ INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
- loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
+ if (const_node->rank() != 1)
+ INTERNAL_EXN("Expected size tensor of rank 1");
- loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
+ if (const_node->dim(0).value() != 2)
+ INTERNAL_EXN("Expected size tensor with shape [2]");
- loco::NodeShape visit(const luci::CircleConcatenation *node) final
- {
- // TODO Support when CircleConcatenation has 0 input
- assert(node->numValues() > 0);
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
+ output_shape.dim(0) = input_shape.dim(0);
+ output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+ output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+ output_shape.dim(3) = input_shape.dim(3);
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
- auto axis = node->axis();
- if (axis < 0)
- axis += first_shape.rank();
+ return loco::NodeShape{output_shape};
+}
- assert(0 <= axis);
- assert(first_shape.rank() > static_cast<uint32_t>(axis));
+loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
+{
+ loco::TensorShape output_shape;
- loco::TensorShape output_shape;
+ auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
- output_shape.rank(first_shape.rank());
- for (uint32_t i = 0; i < output_shape.rank(); ++i)
- output_shape.dim(i) = first_shape.dim(i);
+ const loco::DataType S32 = loco::DataType::S32;
+ const loco::DataType S64 = loco::DataType::S64;
- for (uint32_t i = 1; i < node->numValues(); ++i)
- {
- auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ std::vector<int64_t> vect_shape;
- for (uint32_t j = 0; j < output_shape.rank(); ++j)
- {
- if (j == static_cast<uint32_t>(axis))
- output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
- else
- assert(output_shape.dim(j) == input_shape.dim(j));
- }
- }
+ if (shape_node->dtype() == S32)
+ vect_shape = vector_from_constant<S32>(shape_node);
+ else if (shape_node->dtype() == S64)
+ vect_shape = vector_from_constant<S64>(shape_node);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for shape()");
- return loco::NodeShape{output_shape};
- }
+ output_shape.rank(vect_shape.size());
+ for (uint32_t i = 0; i < vect_shape.size(); ++i)
+ output_shape.dim(i) = vect_shape[i];
- loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleConv2D *node) final
- {
- LOGGER(l);
+loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+ LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
+ LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
+ "segment_ids size must be equal to the size of data's first dimension");
- INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
- << ker_shape.rank() << ")" << std::endl;
+ auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
- assert(ifm_shape.rank() == 4);
- assert(ker_shape.rank() == 4);
- assert(ifm_shape.dim(3) == ker_shape.dim(3));
+ std::vector<int64_t> vect_ids;
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t ker_height = ker_shape.dim(1).value();
- uint32_t ker_width = ker_shape.dim(2).value();
- uint32_t dilation_height = node->dilation()->h();
- uint32_t dilation_width = node->dilation()->w();
- uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
- uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+ if (ids_shape_value->dtype() == loco::DataType::S32)
+ vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
- uint32_t output_height = 0;
- uint32_t output_width = 0;
+ LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
+ "segment_ids values should be sorted")
- if (node->padding() == luci::Padding::VALID)
- {
- output_height = (input_height + stride_height - effective_ker_height) / stride_height;
- output_width = (input_width + stride_width - effective_ker_width) / stride_width;
- }
- else if (node->padding() == luci::Padding::SAME)
+ loco::TensorShape output_shape;
+
+ output_shape.rank(input_shape.rank());
+
+ for (uint32_t i = 1; i < input_shape.rank(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+
+ output_shape.dim(0) = vect_ids.back() + 1;
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_select(const luci::CircleSelect *node)
+{
+ auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+ assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+
+ // condition shape validation
+ auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+ if (c_shape.rank() != t_shape.rank())
+ {
+ if (c_shape.rank() != 0 && c_shape.rank() != 1)
+ INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
+
+ if (c_shape.rank() == 1)
{
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
+ if (c_shape.dim(0).value() != t_shape.dim(0).value())
+ INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
}
- else
- LUCI_ASSERT(false, "Wrong padding type");
+ }
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ker_shape.dim(0);
+ return loco::NodeShape{t_shape};
+}
- return loco::NodeShape{ofm_shape};
- }
+loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
+{
+ auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+ auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+ auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+ // validate ability to broadcast shapes to each other
+ auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
+ return loco::NodeShape{b_shape};
+}
- loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+loco::NodeShape infer_shape(const luci::CircleShape *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+ loco::TensorShape output_shape;
- // Only data format NHWC is supported
- // TODO need to clarify what to do with layout in this operator
- int32_t height = input_shape.dim(1).value();
- int32_t width = input_shape.dim(2).value();
- int32_t depth = input_shape.dim(3).value();
+ output_shape.rank(1);
+ output_shape.dim(0) = input_shape.rank();
- int block_size = node->block_size();
+ return loco::NodeShape{output_shape};
+}
- if (block_size < 2)
- INTERNAL_EXN("Block size must be >= 2");
+loco::NodeShape infer_slice(const luci::CircleSlice *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+ const loco::DataType S64 = loco::DataType::S64;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- if (depth % (block_size * block_size))
+ auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
+ auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+
+ loco::TensorShape output_shape;
+ std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
+ std::vector<int64_t> vect_size;
+
+ if (const_begin->dtype() == S32)
+ vect_begin = vector_from_constant<S32>(const_begin);
+ else if (const_begin->dtype() == S64)
+ vect_begin = vector_from_constant<S64>(const_begin);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+
+ if (const_size->dtype() == S32)
+ vect_size = vector_from_constant<S32>(const_size);
+ else if (const_size->dtype() == S64)
+ vect_size = vector_from_constant<S64>(const_size);
+ else
+ LUCI_ASSERT(false, "Only support int32/int64 for size()");
+
+ assert(input_shape.rank() == vect_begin.size());
+ assert(input_shape.rank() == vect_size.size());
+
+ output_shape.rank(vect_begin.size());
+ for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
+ {
+ auto size = vect_size.at(idx);
+ if (size == -1)
{
- INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+ size = input_shape.dim(idx).value() - vect_begin.at(idx);
}
+ output_shape.dim(idx) = size;
+ }
- loco::TensorShape output_shape;
- output_shape.rank(4);
+ return loco::NodeShape{output_shape};
+}
- output_shape.dim(0) = input_shape.dim(0).value();
- output_shape.dim(1) = height * block_size;
- output_shape.dim(2) = width * block_size;
- output_shape.dim(3) = depth / (block_size * block_size);
+loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- return loco::NodeShape{output_shape};
- }
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ // Support only input rank is 3 and 4
+ assert(input_shape.rank() == 3 || input_shape.rank() == 4);
- loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
- {
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+ // Only support block_shape() with S32 type CircleConst for now
+ auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+ LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
- assert(ifm_shape.rank() == 4);
- assert(ker_shape.rank() == 4);
- assert(ker_shape.dim(0).value() == 1);
+ // Only support paddings() with S32 type CircleConst for now
+ auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
- uint32_t input_height = ifm_shape.dim(1).value();
- uint32_t input_width = ifm_shape.dim(2).value();
- uint32_t stride_height = node->stride()->h();
- uint32_t stride_width = node->stride()->w();
- uint32_t ker_height = ker_shape.dim(1).value();
- uint32_t ker_width = ker_shape.dim(2).value();
- uint32_t dilation_height = node->dilation()->h();
- uint32_t dilation_width = node->dilation()->w();
- uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
- uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+ auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+ assert(const_block_shape_shape.rank() == 1);
+ assert(const_paddings_shape.rank() == 2);
- uint32_t output_height = 0;
- uint32_t output_width = 0;
+ int32_t input_spatial_dim = input_shape.rank() - 2;
+ assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+ assert(const_paddings_shape.dim(0) == input_spatial_dim);
+ assert(const_paddings_shape.dim(1) == 2);
- if (node->padding() == luci::Padding::VALID)
- {
- output_height = (input_height + stride_height - effective_ker_height) / stride_height;
- output_width = (input_width + stride_width - effective_ker_width) / stride_width;
- }
- else if (node->padding() == luci::Padding::SAME)
+ // Check all values of block_shape >= 1
+ uint32_t ele_count = const_block_shape->size<S32>();
+ for (uint32_t e = 0; e < ele_count; ++e)
+ {
+ auto val = const_block_shape->at<S32>(e);
+ if (val < 1)
{
- output_height = (input_height + stride_height - 1) / stride_height;
- output_width = (input_width + stride_width - 1) / stride_width;
+ INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
}
- else
- LUCI_ASSERT(false, "Wrong padding type");
+ }
- loco::TensorShape ofm_shape;
- ofm_shape.rank(4);
- ofm_shape.dim(0) = ifm_shape.dim(0);
- ofm_shape.dim(1) = output_height;
- ofm_shape.dim(2) = output_width;
- ofm_shape.dim(3) = ker_shape.dim(3);
+ loco::TensorShape shape_output;
- return loco::NodeShape{ofm_shape};
+ shape_output.rank(input_shape.rank());
+
+ int32_t output_batch_size = input_shape.dim(0).value();
+ for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+ {
+ int dim_size = input_shape.dim(dim + 1).value();
+ dim_size += const_paddings->at<S32>(dim * 2);
+ dim_size += const_paddings->at<S32>(dim * 2 + 1);
+ shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+
+ assert(dim_size % const_block_shape->at<S32>(dim) == 0);
+ output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
}
+ shape_output.dim(0) = output_batch_size;
+ shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
- loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{shape_output};
+}
- loco::NodeShape visit(const luci::CircleElu *node) final
- {
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
- return loco::NodeShape{input_shape};
+ // Only data format NHWC is supported
+ int32_t height = input_shape.dim(1).value();
+ int32_t width = input_shape.dim(2).value();
+ int32_t depth = input_shape.dim(3).value();
+
+ int block_size = node->block_size();
+
+ if (block_size < 2)
+ INTERNAL_EXN("Block size must be >= 2");
+
+ if ((height % block_size) || (width % block_size))
+ {
+ INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
}
- loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
+ loco::TensorShape output_shape;
+ output_shape.rank(4);
- loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+ output_shape.dim(0) = input_shape.dim(0).value();
+ output_shape.dim(1) = height / block_size;
+ output_shape.dim(2) = width / block_size;
+ output_shape.dim(3) = block_size * block_size * depth;
- loco::NodeShape visit(const luci::CircleExpandDims *node) final
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
+{
+ loco::TensorShape shape;
{
- const loco::DataType S32 = loco::DataType::S32;
- auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- if (x_shape.rank() == 0)
+ LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
+
+ auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
+ if (output_shape_node != nullptr)
{
- // This maybe for unknown shape. We use shape from the node itself.
- return use_own(node);
+ // Only support node with S32
+ LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
+ "Only support int32 CircleConst");
+
+ if (output_shape_node->rank() != 1)
+ INTERNAL_EXN_V("Only support rank 1 CircleConst",
+ oops::to_uint32(output_shape_node->rank()));
+
+ shape.rank(output_shape_node->size<loco::DataType::S32>());
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+ }
}
- auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
- LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
- if (const_axis->rank() != 0 && const_axis->rank() != 1)
+ else
{
- INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+ shape = own_shape(node);
}
- int32_t axis = const_axis->at<S32>(0);
- LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
- (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
- "Axis has to be between [-(D+1), D], where D is rank of input.");
- size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
- loco::TensorShape output_shape;
- output_shape.rank(x_shape.rank() + 1);
- size_t i = 0;
- for (; i < positive_axis; i++)
- output_shape.dim(i) = x_shape.dim(i);
- output_shape.dim(i) = loco::Dimension(1);
- for (; i < x_shape.rank(); i++)
- output_shape.dim(i + 1) = x_shape.dim(i);
- return loco::NodeShape{output_shape};
}
- loco::NodeShape visit(const luci::CircleFill *node) final
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
+{
+ auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
+ auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
+ auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
+
+ if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
{
- loco::TensorShape shape;
- {
- LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+ return use_own(node);
+ }
- auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
- if (dims_node != nullptr)
- {
- // Only support node with S32
- LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+ loco::TensorShape shape = infer_output_shape(node);
+ return loco::NodeShape{shape};
+}
- if (dims_node->rank() != 1)
- INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- shape.rank(dims_node->dim(0).value());
+ // TODO input shape may be unknown before runtime
+ std::vector<bool> do_squeeze(input_shape.rank(), false);
+ uint32_t num_squeezed = 0;
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
- }
+ if (!node->squeeze_dims().empty())
+ {
+ // SqueezeDims not empty, squeeze only dims specified
+ for (int32_t raw_dim : node->squeeze_dims())
+ {
+ int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
+
+ if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
+ input_shape.dim(dim).value() != 1)
+ {
+ INTERNAL_EXN("invalid dimention specified to Squeeze");
}
- else
+
+ if (!do_squeeze[dim])
+ ++num_squeezed;
+ do_squeeze[dim] = true;
+ }
+ }
+ else
+ {
+ // SqueezeDims empty, squeeze any dims with size == 1
+ for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
+ {
+ if (input_shape.dim(dim) == 1)
{
- shape = own_shape(node);
+ do_squeeze[dim] = true;
+ ++num_squeezed;
}
}
+ }
- return loco::NodeShape{shape};
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank() - num_squeezed);
+
+ for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
+ {
+ if (!do_squeeze[in_dim])
+ {
+ output_shape.dim(out_dim++) = input_shape.dim(in_dim);
+ }
}
- loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_tile(const luci::CircleTile *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
- loco::NodeShape visit(const luci::CircleFullyConnected *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+ // TODO support non-const case
+ // TODO support S64 type
+ LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
+ LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
- // Checking shape capability for fully connected layer
- // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
- // Weight: [# of units, K]
- // Output: [D1 * D2 * ... * Dn / K, # of units]
- if (input_shape.rank() < 2 || weights_shape.rank() != 2)
- {
- // Return node own shape if shape inference is not possible
- return use_own(node);
- }
+ uint32_t n = multiples->dim(0).value();
- uint32_t input_size = 1;
- for (uint32_t i = 0; i < input_shape.rank(); i++)
- {
- input_size = input_size * input_shape.dim(i).value();
- }
- const uint32_t batch_size = input_size / weights_shape.dim(1).value();
- loco::TensorShape out_shape;
- out_shape.rank(2);
- out_shape.dim(0) = batch_size;
- out_shape.dim(1) = weights_shape.dim(0);
+ LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
- return loco::NodeShape{out_shape};
- }
+ loco::TensorShape output_shape;
- loco::NodeShape visit(const luci::CircleGather *node) final
+ output_shape.rank(input_shape.rank());
+ for (uint32_t ni = 0; ni < n; ++ni)
{
- loco::TensorShape output_shape;
+ int32_t multiple = multiples->at<S32>(ni);
+ output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
+ }
- const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- int32_t axis = node->axis();
+ return loco::NodeShape{output_shape};
+}
- // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
- // shape that node already has.
- if (input_shape.rank() == 0 || positions_shape.rank() == 0)
- return use_own(node);
+loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
+{
+ auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
- if (axis < 0)
- axis += input_shape.rank();
+ auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
- output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
- int32_t outdim_index = 0;
- for (int32_t i = 0; i < axis; ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
- for (uint32_t i = 0; i < positions_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = positions_shape.dim(i);
- for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
+ loco::TensorShape output_shape;
+ output_shape.rank(input_shape.rank());
- return loco::NodeShape{output_shape};
- }
+ assert(perm_node->dtype() == loco::DataType::S32);
+ assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
- loco::NodeShape visit(const luci::CircleGatherNd *node) final
+ for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
{
- loco::TensorShape output_shape;
+ auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
+ output_shape.dim(out_axis) = input_shape.dim(in_axis);
+ }
- const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ return output_shape;
+}
- const auto params_rank = params_shape.rank();
- const auto indices_rank = indices_shape.rank();
+loco::NodeShape infer_transpose_conv(const luci::CircleTransposeConv *node)
+{
+ // TransposeConv's output shape is written in its 'inputSizes' argument
+ auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
+ // TODO support non-const type
+ LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+ LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+ "Only support rank 1 with 4 entries")
- // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
- // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
- // batch_dims isn't supported in tflite
+ loco::TensorShape shape;
- // TODO: replace exceptions with setting shape to unknown?
+ shape.rank(4);
+ for (uint32_t axis = 0; axis < 4; ++axis)
+ shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
- if (!indices_shape.dim(indices_rank - 1).known())
- INTERNAL_EXN("Last indices dimension is unknown");
+ return loco::NodeShape{shape};
+}
- auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
+{
+ // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
+ // We'll set shape of CircleUnpack to shape of actual outputs
+ // TODO fix this if any problem rises
+ auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
- if (indices_last_dim > params_rank)
- INTERNAL_EXN("Last indices dimension should be <= params rank");
+ auto axis = node->axis();
+ auto num = node->num();
+ auto rank = static_cast<int32_t>(value_shape.rank());
- const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+ if (rank == 0)
+ {
+ // Unknown shape
+ return use_own(node);
+ }
- output_shape.rank(output_rank);
+ LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
- uint32_t output_index = 0;
- for (uint32_t i = 0; i < indices_rank - 1; ++i)
- {
- auto &dim = indices_shape.dim(i);
- if (!dim.known())
- INTERNAL_EXN("Unknown indices dimension is unsupported");
- output_shape.dim(output_index++).set(dim.value());
- }
+ if (axis < 0)
+ axis += rank;
- for (uint32_t i = indices_last_dim; i < params_rank; ++i)
- {
- auto &dim = params_shape.dim(i);
- if (!dim.known())
- INTERNAL_EXN("Unknown params dimension is unsupported");
- output_shape.dim(output_index++).set(dim.value());
- }
+ LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
+ "num, axis maybe incorrect");
- return loco::NodeShape{output_shape};
+ loco::TensorShape output_shape;
+ output_shape.rank(rank - 1);
+
+ for (int32_t i = 0, o = 0; i < rank; ++i)
+ {
+ if (i != axis)
+ output_shape.dim(o++) = value_shape.dim(i);
}
- loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_unique(const luci::CircleUnique *node)
+{
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleIf *node) final
+ assert(input_shape.rank() == 1);
+
+ loco::TensorShape shape_output;
+ shape_output = own_shape(node);
+
+ return loco::NodeShape{shape_output};
+}
+
+// Circle Only
+loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *node)
+{
+ loco::TensorShape out_shape;
+
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
+
+ LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
+
+ int32_t qbits_sum = 0;
+ for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
{
- // Shape of CircleIf is not used. Just use input 0
- assert(node->input_count() > 0);
- const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
+ qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
}
- loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+ out_shape.rank(2);
+ out_shape.dim(0) = qbits_sum;
+ out_shape.dim(1) = input_shape.dim(1);
- loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+ return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
+{
+ loco::TensorShape input_shape;
+ loco::TensorShape output_shape;
+
+ const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
+ const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ auto axis = node->axis();
+
+ auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
+ auto qbits_sum = 0;
+ for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
{
- return infer_pool_2d_shape(node);
+ qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
}
- loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
+ input_shape.rank(2);
+ input_shape.dim(0) = qbits_sum;
+ input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
+
+ output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
+ int32_t outdim_index = 0;
+ for (int32_t i = 0; i < axis; ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+ for (uint32_t i = 0; i < indices_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = indices_shape.dim(i);
+ for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+ output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+ return loco::NodeShape{output_shape};
+}
+
+// Virtual
+loco::NodeShape infer_input(const luci::CircleInput *node)
+{
+ loco::TensorShape shape;
+
+ shape.rank(node->rank());
+ for (uint32_t axis = 0; axis < node->rank(); axis++)
+ shape.dim(axis) = node->dim(axis);
+
+ return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_output(const luci::CircleOutput *node)
+{
+ auto graph_outputs = node->graph()->outputs();
+ auto graph_output = graph_outputs->at(node->index());
+ auto output_shape = graph_output->shape();
+
+ return loco::NodeShape{*output_shape};
+}
+
+loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
+{
+ /**
+ * @note IF operator type and shape are that of the "then" and "else"
+ * Graph Outputs.
+ */
+ auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
+ if (circle_if == nullptr)
{
- const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
+ INTERNAL_EXN("CircleIf IR is not configured correctly");
}
- loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
+ auto index = node->index();
+ auto then_graph = circle_if->then_graph();
+ auto else_graph = circle_if->else_graph();
+ assert(then_graph != nullptr);
+ assert(else_graph != nullptr);
- loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
+ // shape and type are assumed to be same
+ // these are checked at post_import_graph() in Import
+ auto then_outputs = loco::output_nodes(then_graph);
+ auto else_outputs = loco::output_nodes(else_graph);
+ assert(then_outputs.size() == else_outputs.size());
+ assert(index < static_cast<int32_t>(then_outputs.size()));
- loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+ auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+ auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
+
+ auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+ auto else_graph_outputs = else_graph->outputs();
+ assert(then_graph_outputs->size() == else_graph_outputs->size());
+
+ auto then_graph_output = then_graph_outputs->at(then_out->index());
+ auto else_graph_output = else_graph_outputs->at(else_out->index());
+ (void)else_graph_output; // make compiler happy for unused variable warnings
+ assert(*then_graph_output->shape() == *else_graph_output->shape());
+
+ return loco::NodeShape{*then_graph_output->shape()};
+}
+
+loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+ if (nmsv4 == nullptr)
+ INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
+
+ auto index = node->index();
+ if (index == 1)
+ return loco::TensorShape({0});
+
+ assert(index == 0);
+
+ auto unknown = loco::TensorShape{loco::Dimension()};
+ auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+ if (max_output_size == nullptr)
+ return unknown; // we need CircleConst for max output size
+
+ LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+ if (max_output_size->size<S32>() < 1)
+ return unknown;
+
+ auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+ return loco::TensorShape{max_output_size_value};
+}
+
+loco::NodeShape infer_non_max_suppression_v5_out(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto nmsv5 = dynamic_cast<const luci::CircleNonMaxSuppressionV5 *>(node->input());
+ if (nmsv5 == nullptr)
+ INTERNAL_EXN("CircleNonMaxSuppressionV5 IR is not configured correctly");
+
+ auto index = node->index();
+ if (index == 2)
+ return loco::TensorShape({0});
+
+ assert(index == 0 || index == 1);
+
+ auto unknown = loco::TensorShape{loco::Dimension()};
+ auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv5->max_output_size());
+ if (max_output_size == nullptr)
+ return unknown; // we need CircleConst for max output size
+
+ LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+ if (max_output_size->size<S32>() < 1)
+ return unknown;
+
+ auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+ return loco::TensorShape{max_output_size_value};
+}
+
+loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
+ if (split == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
+
+ loco::NodeShape unknown;
+
+ auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+
+ auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+ if (split_dim == nullptr)
+ return unknown; // we need CircleConst for split_dim
+ LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+
+ assert(split_dim->size<S32>() == 1);
+ auto split_dim_axis = split_dim->at<S32>(0);
+ if (split_dim_axis < 0)
+ split_dim_axis += split_shape.rank();
+
+ auto split_dim_value = split_shape.dim(split_dim_axis).value();
+ assert(split_dim_value % split->num_split() == 0);
+ const int split_depth = split_dim_value / split->num_split();
+
+ loco::TensorShape output_shape = split_shape;
+
+ // All shapes are equally same
+ output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
+
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
+ if (split == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
+
+ loco::NodeShape unknown;
+
+ auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+
+ auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
+ if (size_splits == nullptr)
+ return unknown; // we need CircleConst for size_splits
+ LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
+
+ auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+ if (split_dim == nullptr)
+ return unknown; // we need CircleConst for split_dim
+ LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+
+ // fetch axis
+ assert(split_dim->size<S32>() == 1);
+ auto split_dim_axis = split_dim->at<S32>(0);
+ if (split_dim_axis < 0)
+ split_dim_axis += split_shape.rank();
+
+ // interpret size_splits values
+ int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
+ assert(size_splits_count == split->num_split());
+
+ int64_t minus_one_count = 0, size_splits_sum = 0;
+ for (int32_t idx = 0; idx < size_splits_count; ++idx)
{
- const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- return loco::NodeShape{input_shape};
+ auto size = size_splits->at<S32>(idx);
+ assert(size >= -1);
+ if (size == -1)
+ ++minus_one_count;
+ else
+ size_splits_sum += size;
}
+ if (minus_one_count > 1)
+ INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
- loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
+ // calcuate this SplitVOut shape
+ auto input_size = split_shape.dim(split_dim_axis).value();
+ assert(size_splits_sum <= input_size);
- loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
+ auto index_this = node->index();
+ assert(0 <= index_this && index_this < split->num_split());
+ auto split_depth = size_splits->at<S32>(index_this);
+ if (split_depth == -1)
+ split_depth = input_size - size_splits_sum;
- loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
+ loco::TensorShape output_shape = split_shape;
- loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
+ output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
- loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+ return loco::NodeShape{output_shape};
+}
- loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
+{
+ const loco::DataType S32 = loco::DataType::S32;
- auto rank = diagonal_shape.rank();
+ auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
+ if (topkv2 == nullptr)
+ INTERNAL_EXN("CircleSplit IR is not configured correctly");
- LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+ // shape of topkv2 is same as topkv2->input()
+ auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
- for (uint32_t i = 0; i < rank - 1; i++)
- {
- LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
- }
+ auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
+ LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
+ assert(node_k->size<S32>() == 1);
- auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+ loco::TensorShape output_shape;
- LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+ output_shape.rank(input_shape.rank());
+ for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
+ }
+ output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
- return loco::NodeShape{input_shape};
+ return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
+{
+ if (node->index() == 0)
+ {
+ auto unique_shape = own_shape(node);
+ return loco::NodeShape{unique_shape};
}
+ assert(node->index() == 1);
+ auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+ auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
+ assert(unique_shape.rank() == 1);
- loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
+ loco::TensorShape shape_output;
+ shape_output.rank(1);
+ shape_output.dim(0) = unique_shape.dim(0);
+ return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
+{
+ auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
+ if (unpack == nullptr)
{
- loco::TensorShape output_shape;
+ INTERNAL_EXN("CircleUnpack IR is not configured correctly");
+ }
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
- auto rank = diagonal_shape.rank();
+ auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
- output_shape.rank(rank + 1);
+ return loco::NodeShape{unpack_shape};
+}
- for (uint32_t i = 0; i < rank; i++)
- {
- output_shape.dim(i) = diagonal_shape.dim(i);
- }
+loco::NodeShape infer_while_out(const luci::CircleWhileOut *node)
+{
+ /**
+ * @note WHILE operator's shape is the same with the "cond"
+ * Graph input.
+ */
+ auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
+ if (circle_while == nullptr)
+ {
+ INTERNAL_EXN("CircleWhile IR is not configured correctly");
+ }
- output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+ auto index = node->index();
+ auto cond_graph = circle_while->cond_graph();
+ assert(cond_graph != nullptr);
- return loco::NodeShape{output_shape};
+ // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
+ // loco::input_nodes
+ auto cond_inputs = loco::input_nodes(cond_graph);
+ auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
+
+ auto cond_graph_inputs = cond_graph->inputs();
+ auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+
+ auto cond_graph_input_shape = *cond_graph_input->shape();
+ auto this_shape = own_shape(node);
+
+ if (!(this_shape == cond_graph_input_shape))
+ {
+ LOGGER(l);
+ WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
+ << " vs " << cond_graph_input_shape;
}
- loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+ return loco::NodeShape{this_shape};
+}
- loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+ loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
+
+ loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
+
+ loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
+
+ loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
{
return infer_pool_2d_shape(node);
}
- loco::NodeShape visit(const luci::CircleMean *node) final
+ loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
{
- auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
- return loco::NodeShape{output_shape};
- }
+ auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
- loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+ return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
+ }
- loco::NodeShape visit(const luci::CircleMirrorPad *node) final
+ loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
+ return infer_batch_to_space_nd(node);
+ }
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
- // TODO support non-const case
- // TODO support other data type
- LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
- LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+ loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
- int32_t n = paddings->dim(0).value();
- int32_t v = paddings->dim(1).value();
+ loco::NodeShape visit(const luci::CircleConcatenation *node) final
+ {
+ return infer_concatenation(node);
+ }
- LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
- LUCI_ASSERT(n == int32_t(input_shape.rank()),
- "paddings [n, 2] should have same value of input rank");
+ loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
- loco::TensorShape output_shape;
+ loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }
- output_shape.rank(input_shape.rank());
- for (int32_t ni = 0; ni < n; ++ni)
- {
- int32_t idx = ni * 2;
- int value = input_shape.dim(ni).value();
- value += paddings->at<S32>(idx + 0); // left
- value += paddings->at<S32>(idx + 1); // right
- output_shape.dim(ni) = value;
- }
+ loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+
+ loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
+ {
+ return infer_depth_to_space(node);
}
- loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+ loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
+ {
+ return infer_depthwise_conv2d(node);
+ }
- loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+ loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
- loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+ loco::NodeShape visit(const luci::CircleElu *node) final
{
- const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
- return loco::NodeShape{boxes_shape};
+ auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+ return loco::NodeShape{input_shape};
}
- loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
+ loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
- loco::NodeShape visit(const luci::CircleOneHot *node) final
+ loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleExpandDims *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
- auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- // Only support OneHot node's depth() is CircleConst with type S32
- // TODO support depth with other types
- auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
- LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
- if (depth->rank() != 0)
- INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
- loco::TensorShape output_shape;
- output_shape.rank(indices_shape.rank() + 1);
- auto axis = node->axis();
- if (axis < 0)
- axis += indices_shape.rank() + 1;
- LUCI_ASSERT(0 <= axis, "Axis is out of range");
- LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
- uint32_t j = 0;
- for (uint32_t i = 0; i < output_shape.rank(); i++)
- {
- if (i == static_cast<uint32_t>(axis))
- {
- output_shape.dim(i) = depth->at<S32>(0);
- }
- else
- {
- output_shape.dim(i) = indices_shape.dim(j++);
- }
- }
- return loco::NodeShape{output_shape};
+ return infer_expand_dims(node);
}
- loco::NodeShape visit(const luci::CirclePack *node) final
+ loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
+
+ loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleFullyConnected *node) final
{
- LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+ return infer_fully_connected(node);
+ }
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
- // Make sure all inputs have the same shape.
- for (uint32_t i = 1; i < node->values_count(); ++i)
- {
- auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
- LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
- "All inputs must have the same shape");
- }
+ loco::NodeShape visit(const luci::CircleGather *node) final { return infer_gather(node); }
- // Checking shape capability for pack layer
- // Input: tensors [D1, D2, ... Dn]
- // Axis: K
- // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
- auto axis = node->axis();
- if (axis < 0)
- axis += first_shape.rank() + 1;
+ loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); }
- LUCI_ASSERT(0 <= axis, "Axis is out of range");
- LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+ loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
- loco::TensorShape output_shape;
- output_shape.rank(first_shape.rank() + 1);
+ loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
- uint32_t j = 0;
- for (uint32_t i = 0; i < output_shape.rank(); ++i)
- {
- if (i == static_cast<uint32_t>(axis))
- {
- output_shape.dim(i) = node->values_count();
- }
- else
- {
- output_shape.dim(i) = first_shape.dim(j++);
- }
- }
+ loco::NodeShape visit(const luci::CircleIf *node) final
+ {
+ // Shape of CircleIf is not used. Just use input 0
+ assert(node->input_count() > 0);
+ const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- return loco::NodeShape{output_shape};
+ loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
}
- loco::NodeShape visit(const luci::CirclePad *node) final
+ loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
+ const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
- // TODO support non-const case
- // TODO support other data type
- LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
- LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+ loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
- int32_t n = paddings->dim(0).value();
- int32_t v = paddings->dim(1).value();
+ loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+ {
+ const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ return loco::NodeShape{input_shape};
+ }
- LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
- LUCI_ASSERT(n == int32_t(input_shape.rank()),
- "paddings [n, 2] should have same value of input rank");
+ loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
- loco::TensorShape output_shape;
+ loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
- output_shape.rank(input_shape.rank());
- for (int32_t ni = 0; ni < n; ++ni)
- {
- int32_t idx = ni * 2;
- int value = input_shape.dim(ni).value();
- value += paddings->at<S32>(idx + 0); // left
- value += paddings->at<S32>(idx + 1); // right
- output_shape.dim(ni) = value;
- }
+ loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
+ loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
- loco::NodeShape visit(const luci::CirclePRelu *node) final
+ loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+ return infer_matrix_diag(node);
+ }
+
+ loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
+ {
+ return infer_matrix_set_diag(node);
+ }
- auto output_shape = broadcast_shape(input_shape, alpha_shape);
+ loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
+ {
+ return infer_pool_2d_shape(node);
+ }
+ loco::NodeShape visit(const luci::CircleMean *node) final
+ {
+ auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
return loco::NodeShape{output_shape};
}
- loco::NodeShape visit(const luci::CircleRange *node) final
+ loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleMirrorPad *node) final { return infer_mirror_pad(node); }
+
+ loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+
+ loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
{
- loco::TensorShape output_shape;
- output_shape.rank(1);
+ const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ return loco::NodeShape{boxes_shape};
+ }
- auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
- auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
- auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ return loco::NodeShape{boxes_shape};
+ }
- if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
- {
- return use_own(node);
- }
+ loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
- double start = 0, limit = 0, delta = 0;
+ loco::NodeShape visit(const luci::CircleOneHot *node) final { return infer_one_hot(node); }
-#define GET_RANGE_PARAM(DT) \
- start = start_node->scalar<DT>(); \
- limit = limit_node->scalar<DT>(); \
- delta = delta_node->scalar<DT>();
+ loco::NodeShape visit(const luci::CirclePack *node) final { return infer_pack(node); }
- switch (start_node->dtype())
- {
- case loco::DataType::FLOAT32:
- GET_RANGE_PARAM(loco::DataType::FLOAT32)
- break;
- case loco::DataType::S32:
- GET_RANGE_PARAM(loco::DataType::S32)
- break;
- default:
- INTERNAL_EXN("Range data type not supported");
- }
+ loco::NodeShape visit(const luci::CirclePad *node) final { return infer_pad(node); }
-#undef GET_RANGE_PARAM
+ loco::NodeShape visit(const luci::CirclePadV2 *node) final { return infer_pad_v2(node); }
- if (delta == 0)
- INTERNAL_EXN("Delta can not be zero");
+ loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
- output_shape.dim(0) = ceil((limit - start) / delta);
+ loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); }
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); }
loco::NodeShape visit(const luci::CircleRank *) final
{
@@ -1242,136 +2248,16 @@ public:
*
* TODO Change this policy when not appropriate
*/
- loco::NodeShape visit(const luci::CircleReshape *node) final
- {
- LOGGER(l);
-
- const loco::DataType S32 = loco::DataType::S32;
-
- loco::TensorShape shape_by_input;
- {
- LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
-
- // Only support node's shape() is CircleConst with S32
- // TODO support other node with other types
- auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
- if (const_shape_node != nullptr)
- {
- LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
-
- shape_by_input.rank(const_shape_node->size<S32>());
-
- for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
- {
- shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
- }
- }
- else
- {
- // We use shape from the node itself
- shape_by_input = own_shape(node);
- }
- }
-
- loco::TensorShape shape_by_attr;
- {
- shape_by_attr.rank(node->newShape()->rank());
-
- for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
- {
- shape_by_attr.dim(axis) = node->newShape()->dim(axis);
- }
- }
-
- if (!(shape_by_input == shape_by_attr))
- {
- INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
- INFO(l) << " shape_by_input : " << shape_by_input << std::endl;
- INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl;
- }
-
- loco::TensorShape output_shape = shape_by_input;
-
- // One of the dimensions can have special value -1, meaning its actual value should be inferred.
- const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
- const uint32_t input_element_count = loco::element_count(&input_shape);
- uint32_t output_element_count = 1;
- uint32_t unknown_dim_index = UINT32_MAX;
- for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
- {
- const uint32_t dim_value = output_shape.dim(dim_index).value();
- if (static_cast<int>(dim_value) == -1)
- {
- LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
- unknown_dim_index = dim_index;
- }
- else
- {
- output_element_count *= dim_value;
- }
- }
- if (unknown_dim_index != UINT32_MAX)
- {
- output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleReshape *node) final { return infer_reshape(node); }
loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
- auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
- if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
- if (const_node->rank() != 1)
- INTERNAL_EXN("Expected size tensor of rank 1");
-
- if (const_node->dim(0).value() != 2)
- INTERNAL_EXN("Expected size tensor with shape [2]");
-
- loco::TensorShape output_shape;
- output_shape.rank(4);
- output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
- output_shape.dim(3) = input_shape.dim(3);
-
- return loco::NodeShape{output_shape};
+ return infer_resize_bilinear(node);
}
loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
-
- auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
- if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
-
- if (const_node->rank() != 1)
- INTERNAL_EXN("Expected size tensor of rank 1");
-
- if (const_node->dim(0).value() != 2)
- INTERNAL_EXN("Expected size tensor with shape [2]");
-
- loco::TensorShape output_shape;
- output_shape.rank(4);
- output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
- output_shape.dim(3) = input_shape.dim(3);
-
- return loco::NodeShape{output_shape};
+ return infer_resize_nearest_neighbor(node);
}
loco::NodeShape visit(const luci::CircleReverseSequence *node) final
@@ -1395,276 +2281,38 @@ public:
loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CircleScatterNd *node) final
- {
- loco::TensorShape output_shape;
-
- auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
-
- const loco::DataType S32 = loco::DataType::S32;
- const loco::DataType S64 = loco::DataType::S64;
-
- std::vector<int64_t> vect_shape;
-
- if (shape_node->dtype() == S32)
- vect_shape = vector_from_constant<S32>(shape_node);
- else if (shape_node->dtype() == S64)
- vect_shape = vector_from_constant<S64>(shape_node);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for shape()");
-
- output_shape.rank(vect_shape.size());
- for (uint32_t i = 0; i < vect_shape.size(); ++i)
- output_shape.dim(i) = vect_shape[i];
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleScatterNd *node) final { return infer_scatter_nd(node); }
loco::NodeShape visit(const luci::CircleSegmentSum *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
-
- LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
- LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
- "segment_ids size must be equal to the size of data's first dimension");
-
- auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
-
- std::vector<int64_t> vect_ids;
-
- if (ids_shape_value->dtype() == loco::DataType::S32)
- vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
-
- LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
- "segment_ids values should be sorted")
-
- loco::TensorShape output_shape;
-
- output_shape.rank(input_shape.rank());
-
- for (uint32_t i = 1; i < input_shape.rank(); ++i)
- output_shape.dim(i) = input_shape.dim(i);
-
- output_shape.dim(0) = vect_ids.back() + 1;
-
- return loco::NodeShape{output_shape};
+ return infer_segment_sum(node);
}
- loco::NodeShape visit(const luci::CircleSelect *node) final
- {
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+ loco::NodeShape visit(const luci::CircleSelect *node) final { return infer_select(node); }
- // condition shape validation
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
- if (c_shape.rank() != t_shape.rank())
- {
- if (c_shape.rank() != 0 && c_shape.rank() != 1)
- INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
+ loco::NodeShape visit(const luci::CircleSelectV2 *node) final { return infer_select_v2(node); }
- if (c_shape.rank() == 1)
- {
- if (c_shape.dim(0).value() != t_shape.dim(0).value())
- INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
- }
- }
-
- return loco::NodeShape{t_shape};
- }
-
- loco::NodeShape visit(const luci::CircleSelectV2 *node) final
- {
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
-
- // validate ability to broadcast shapes to each other
- auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
- return loco::NodeShape{b_shape};
- }
-
- loco::NodeShape visit(const luci::CircleShape *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- loco::TensorShape output_shape;
-
- output_shape.rank(1);
- output_shape.dim(0) = input_shape.rank();
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleShape *node) final { return infer_shape(node); }
loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CircleSlice *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
- const loco::DataType S64 = loco::DataType::S64;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
- auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
-
- loco::TensorShape output_shape;
- std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
- std::vector<int64_t> vect_size;
-
- if (const_begin->dtype() == S32)
- vect_begin = vector_from_constant<S32>(const_begin);
- else if (const_begin->dtype() == S64)
- vect_begin = vector_from_constant<S64>(const_begin);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for begin()");
-
- if (const_size->dtype() == S32)
- vect_size = vector_from_constant<S32>(const_size);
- else if (const_size->dtype() == S64)
- vect_size = vector_from_constant<S64>(const_size);
- else
- LUCI_ASSERT(false, "Only support int32/int64 for size()");
-
- assert(input_shape.rank() == vect_begin.size());
- assert(input_shape.rank() == vect_size.size());
-
- output_shape.rank(vect_begin.size());
- for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
- {
- auto size = vect_size.at(idx);
- if (size == -1)
- {
- size = input_shape.dim(idx).value() - vect_begin.at(idx);
- }
- output_shape.dim(idx) = size;
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleSlice *node) final { return infer_slice(node); }
loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- // Support only input rank is 3 and 4
- assert(input_shape.rank() == 3 || input_shape.rank() == 4);
-
- // Only support block_shape() with S32 type CircleConst for now
- auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
- LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
-
- // Only support paddings() with S32 type CircleConst for now
- auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
- LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
-
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
- assert(const_block_shape_shape.rank() == 1);
- assert(const_paddings_shape.rank() == 2);
-
- int32_t input_spatial_dim = input_shape.rank() - 2;
- assert(const_block_shape_shape.dim(0) == input_spatial_dim);
- assert(const_paddings_shape.dim(0) == input_spatial_dim);
- assert(const_paddings_shape.dim(1) == 2);
-
- // Check all values of block_shape >= 1
- uint32_t ele_count = const_block_shape->size<S32>();
- for (uint32_t e = 0; e < ele_count; ++e)
- {
- auto val = const_block_shape->at<S32>(e);
- if (val < 1)
- {
- INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
- }
- }
-
- loco::TensorShape shape_output;
-
- shape_output.rank(input_shape.rank());
-
- int32_t output_batch_size = input_shape.dim(0).value();
- for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
- {
- int dim_size = input_shape.dim(dim + 1).value();
- dim_size += const_paddings->at<S32>(dim * 2);
- dim_size += const_paddings->at<S32>(dim * 2 + 1);
- shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
-
- assert(dim_size % const_block_shape->at<S32>(dim) == 0);
- output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
- }
- shape_output.dim(0) = output_batch_size;
- shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
-
- return loco::NodeShape{shape_output};
+ return infer_space_to_batch_nd(node);
}
loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
-
- // Only data format NHWC is supported
- int32_t height = input_shape.dim(1).value();
- int32_t width = input_shape.dim(2).value();
- int32_t depth = input_shape.dim(3).value();
-
- int block_size = node->block_size();
-
- if (block_size < 2)
- INTERNAL_EXN("Block size must be >= 2");
-
- if ((height % block_size) || (width % block_size))
- {
- INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
- }
-
- loco::TensorShape output_shape;
- output_shape.rank(4);
-
- output_shape.dim(0) = input_shape.dim(0).value();
- output_shape.dim(1) = height / block_size;
- output_shape.dim(2) = width / block_size;
- output_shape.dim(3) = block_size * block_size * depth;
-
- return loco::NodeShape{output_shape};
+ return infer_space_to_depth(node);
}
loco::NodeShape visit(const luci::CircleSparseToDense *node) final
{
- loco::TensorShape shape;
- {
- LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
-
- auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
- if (output_shape_node != nullptr)
- {
- // Only support node with S32
- LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst");
-
- if (output_shape_node->rank() != 1)
- INTERNAL_EXN_V("Only support rank 1 CircleConst",
- oops::to_uint32(output_shape_node->rank()));
-
- shape.rank(output_shape_node->dim(0).value());
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
- }
- }
- else
- {
- shape = own_shape(node);
- }
- }
-
- return loco::NodeShape{shape};
+ return infer_sparse_to_dense(node);
}
loco::NodeShape visit(const luci::CircleSplit *node) final
@@ -1692,71 +2340,10 @@ public:
loco::NodeShape visit(const luci::CircleStridedSlice *node) final
{
- auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
- auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
- auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
-
- if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
- {
- return use_own(node);
- }
-
- loco::TensorShape shape = infer_output_shape(node);
- return loco::NodeShape{shape};
+ return infer_strided_slice(node);
}
- loco::NodeShape visit(const luci::CircleSqueeze *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- // TODO input shape may be unknown before runtime
- std::vector<bool> do_squeeze(input_shape.rank(), false);
- uint32_t num_squeezed = 0;
-
- if (!node->squeeze_dims().empty())
- {
- // SqueezeDims not empty, squeeze only dims specified
- for (int32_t raw_dim : node->squeeze_dims())
- {
- int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
-
- if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
- input_shape.dim(dim).value() != 1)
- {
- INTERNAL_EXN("invalid dimention specified to Squeeze");
- }
-
- if (!do_squeeze[dim])
- ++num_squeezed;
- do_squeeze[dim] = true;
- }
- }
- else
- {
- // SqueezeDims empty, squeeze any dims with size == 1
- for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
- {
- if (input_shape.dim(dim) == 1)
- {
- do_squeeze[dim] = true;
- ++num_squeezed;
- }
- }
- }
-
- loco::TensorShape output_shape;
- output_shape.rank(input_shape.rank() - num_squeezed);
-
- for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
- {
- if (!do_squeeze[in_dim])
- {
- output_shape.dim(out_dim++) = input_shape.dim(in_dim);
- }
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleSqueeze *node) final { return infer_squeeze(node); }
loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); }
@@ -1768,33 +2355,7 @@ public:
loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
- loco::NodeShape visit(const luci::CircleTile *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
-
- // TODO support non-const case
- // TODO support S64 type
- LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
- LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
-
- uint32_t n = multiples->dim(0).value();
-
- LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
-
- loco::TensorShape output_shape;
-
- output_shape.rank(input_shape.rank());
- for (uint32_t ni = 0; ni < n; ++ni)
- {
- int32_t multiple = multiples->at<S32>(ni);
- output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
loco::NodeShape visit(const luci::CircleTopKV2 *node) final
{
@@ -1803,93 +2364,16 @@ public:
return loco::NodeShape{input_shape};
}
- loco::NodeShape visit(const luci::CircleTranspose *node) final
- {
- auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
-
- auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
-
- loco::TensorShape output_shape;
- output_shape.rank(input_shape.rank());
-
- assert(perm_node->dtype() == loco::DataType::S32);
- assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
-
- for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
- {
- auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
- output_shape.dim(out_axis) = input_shape.dim(in_axis);
- }
-
- return output_shape;
- }
-
- loco::NodeShape visit(const luci::CircleUnique *node) final
- {
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
- assert(input_shape.rank() == 1);
-
- loco::TensorShape shape_output;
- shape_output = own_shape(node);
-
- return loco::NodeShape{shape_output};
- }
+ loco::NodeShape visit(const luci::CircleTranspose *node) final { return infer_transpose(node); }
loco::NodeShape visit(const luci::CircleTransposeConv *node) final
{
- // TransposeConv's output shape is written in its 'inputSizes' argument
- auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
- // TODO support non-const type
- LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
- LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
- "Only support rank 1 with 4 entries")
-
- loco::TensorShape shape;
-
- shape.rank(4);
- for (uint32_t axis = 0; axis < 4; ++axis)
- shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
-
- return loco::NodeShape{shape};
+ return infer_transpose_conv(node);
}
- loco::NodeShape visit(const luci::CircleUnpack *node) final
- {
- // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
- // We'll set shape of CircleUnpack to shape of actual outputs
- // TODO fix this if any problem rises
- auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
-
- auto axis = node->axis();
- auto num = node->num();
- auto rank = static_cast<int32_t>(value_shape.rank());
-
- if (rank == 0)
- {
- // Unknown shape
- return use_own(node);
- }
-
- LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
-
- if (axis < 0)
- axis += rank;
-
- LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
- "num, axis maybe incorrect");
-
- loco::TensorShape output_shape;
- output_shape.rank(rank - 1);
+ loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); }
- for (int32_t i = 0, o = 0; i < rank; ++i)
- {
- if (i != axis)
- output_shape.dim(o++) = value_shape.dim(i);
- }
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); }
loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
@@ -1911,57 +2395,10 @@ public:
// Circle Only
loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final
{
- loco::TensorShape out_shape;
-
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
-
- LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
-
- int32_t qbits_sum = 0;
- for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
- {
- qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
- }
-
- out_shape.rank(2);
- out_shape.dim(0) = qbits_sum;
- out_shape.dim(1) = input_shape.dim(1);
-
- return loco::NodeShape{out_shape};
+ return infer_bcq_fully_connected(node);
}
- loco::NodeShape visit(const luci::CircleBCQGather *node) final
- {
- loco::TensorShape input_shape;
- loco::TensorShape output_shape;
-
- const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
- auto axis = node->axis();
-
- auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
- auto qbits_sum = 0;
- for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
- {
- qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
- }
-
- input_shape.rank(2);
- input_shape.dim(0) = qbits_sum;
- input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
-
- output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
- int32_t outdim_index = 0;
- for (int32_t i = 0; i < axis; ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
- for (uint32_t i = 0; i < indices_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = indices_shape.dim(i);
- for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
- output_shape.dim(outdim_index++) = input_shape.dim(i);
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleBCQGather *node) final { return infer_bcq_gather(node); }
loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
{
@@ -1971,25 +2408,9 @@ public:
}
// Virtual
- loco::NodeShape visit(const luci::CircleInput *node) final
- {
- loco::TensorShape shape;
+ loco::NodeShape visit(const luci::CircleInput *node) final { return infer_input(node); }
- shape.rank(node->rank());
- for (uint32_t axis = 0; axis < node->rank(); axis++)
- shape.dim(axis) = node->dim(axis);
-
- return loco::NodeShape{shape};
- }
-
- loco::NodeShape visit(const luci::CircleOutput *node) final
- {
- auto graph_outputs = node->graph()->outputs();
- auto graph_output = graph_outputs->at(node->index());
- auto output_shape = graph_output->shape();
-
- return loco::NodeShape{*output_shape};
- }
+ loco::NodeShape visit(const luci::CircleOutput *node) final { return infer_output(node); }
loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); }
@@ -1997,259 +2418,32 @@ public:
loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
- loco::NodeShape visit(const luci::CircleIfOut *node) final
- {
- /**
- * @note IF operator type and shape are that of the "then" and "else"
- * Graph Outputs.
- */
- auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
- if (circle_if == nullptr)
- {
- INTERNAL_EXN("CircleIf IR is not configured correctly");
- }
-
- auto index = node->index();
- auto then_graph = circle_if->then_graph();
- auto else_graph = circle_if->else_graph();
- assert(then_graph != nullptr);
- assert(else_graph != nullptr);
-
- // shape and type are assumed to be same
- // these are checked at post_import_graph() in Import
- auto then_outputs = loco::output_nodes(then_graph);
- auto else_outputs = loco::output_nodes(else_graph);
- assert(then_outputs.size() == else_outputs.size());
- assert(index < static_cast<int32_t>(then_outputs.size()));
-
- auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
- auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
- auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
- auto else_graph_outputs = else_graph->outputs();
- assert(then_graph_outputs->size() == else_graph_outputs->size());
-
- auto then_graph_output = then_graph_outputs->at(then_out->index());
- auto else_graph_output = else_graph_outputs->at(else_out->index());
- (void)else_graph_output; // make compiler happy for unused variable warnings
- assert(*then_graph_output->shape() == *else_graph_output->shape());
-
- return loco::NodeShape{*then_graph_output->shape()};
- }
+ loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
- if (nmsv4 == nullptr)
- INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
-
- auto index = node->index();
- if (index == 1)
- return loco::TensorShape({0});
-
- assert(index == 0);
-
- auto unknown = loco::TensorShape{loco::Dimension()};
- auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
- if (max_output_size == nullptr)
- return unknown; // we need CircleConst for max output size
-
- LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
-
- if (max_output_size->size<S32>() < 1)
- return unknown;
-
- auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
- return loco::TensorShape{max_output_size_value};
+ return infer_non_max_suppression_v4_out(node);
}
- loco::NodeShape visit(const luci::CircleSplitOut *node) final
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
- if (split == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- loco::NodeShape unknown;
-
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
- auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
- if (split_dim == nullptr)
- return unknown; // we need CircleConst for split_dim
- LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
- assert(split_dim->size<S32>() == 1);
- auto split_dim_axis = split_dim->at<S32>(0);
- if (split_dim_axis < 0)
- split_dim_axis += split_shape.rank();
-
- auto split_dim_value = split_shape.dim(split_dim_axis).value();
- assert(split_dim_value % split->num_split() == 0);
- const int split_depth = split_dim_value / split->num_split();
-
- loco::TensorShape output_shape = split_shape;
-
- // All shapes are equally same
- output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
- return loco::NodeShape{output_shape};
+ return infer_non_max_suppression_v5_out(node);
}
- loco::NodeShape visit(const luci::CircleSplitVOut *node) final
- {
- const loco::DataType S32 = loco::DataType::S32;
-
- auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
- if (split == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- loco::NodeShape unknown;
-
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
- auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
- if (size_splits == nullptr)
- return unknown; // we need CircleConst for size_splits
- LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
-
- auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
- if (split_dim == nullptr)
- return unknown; // we need CircleConst for split_dim
- LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
- // fetch axis
- assert(split_dim->size<S32>() == 1);
- auto split_dim_axis = split_dim->at<S32>(0);
- if (split_dim_axis < 0)
- split_dim_axis += split_shape.rank();
-
- // interpret size_splits values
- int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
- assert(size_splits_count == split->num_split());
-
- int64_t minus_one_count = 0, size_splits_sum = 0;
- for (int32_t idx = 0; idx < size_splits_count; ++idx)
- {
- auto size = size_splits->at<S32>(idx);
- assert(size >= -1);
- if (size == -1)
- ++minus_one_count;
- else
- size_splits_sum += size;
- }
- if (minus_one_count > 1)
- INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
-
- // calcuate this SplitVOut shape
- auto input_size = split_shape.dim(split_dim_axis).value();
- assert(size_splits_sum <= input_size);
-
- auto index_this = node->index();
- assert(0 <= index_this && index_this < split->num_split());
- auto split_depth = size_splits->at<S32>(index_this);
- if (split_depth == -1)
- split_depth = input_size - size_splits_sum;
+ loco::NodeShape visit(const luci::CircleSplitOut *node) final { return infer_split_out(node); }
- loco::TensorShape output_shape = split_shape;
-
- output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
- return loco::NodeShape{output_shape};
- }
+ loco::NodeShape visit(const luci::CircleSplitVOut *node) final { return infer_split_v_out(node); }
loco::NodeShape visit(const luci::CircleTopKV2Out *node) final
{
- const loco::DataType S32 = loco::DataType::S32;
-
- auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
- if (topkv2 == nullptr)
- INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
- // shape of topkv2 is same as topkv2->input()
- auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
-
- auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
- LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
- assert(node_k->size<S32>() == 1);
-
- loco::TensorShape output_shape;
-
- output_shape.rank(input_shape.rank());
- for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
- {
- output_shape.dim(idx) = input_shape.dim(idx);
- }
- output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
-
- return loco::NodeShape{output_shape};
+ return infer_top_k_v2_out(node);
}
- loco::NodeShape visit(const luci::CircleUniqueOut *node) final
- {
- auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
- if (unique == nullptr)
- {
- INTERNAL_EXN("CircleUnique IR is not configured correctly");
- }
+ loco::NodeShape visit(const luci::CircleUniqueOut *node) final { return infer_unique_out(node); }
- auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
+ loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
- return loco::NodeShape{unique_shape};
- }
-
- loco::NodeShape visit(const luci::CircleUnpackOut *node) final
- {
- auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
- if (unpack == nullptr)
- {
- INTERNAL_EXN("CircleUnpack IR is not configured correctly");
- }
-
- auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
-
- return loco::NodeShape{unpack_shape};
- }
-
- loco::NodeShape visit(const luci::CircleWhileOut *node) final
- {
- /**
- * @note WHILE operator's shape is the same with the "cond"
- * Graph input.
- */
- auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
- if (circle_while == nullptr)
- {
- INTERNAL_EXN("CircleWhile IR is not configured correctly");
- }
-
- auto index = node->index();
- auto cond_graph = circle_while->cond_graph();
- assert(cond_graph != nullptr);
-
- // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
- // loco::input_nodes
- auto cond_inputs = loco::input_nodes(cond_graph);
- auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
-
- auto cond_graph_inputs = cond_graph->inputs();
- auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
-
- auto cond_graph_input_shape = *cond_graph_input->shape();
- auto this_shape = own_shape(node);
-
- if (!(this_shape == cond_graph_input_shape))
- {
- LOGGER(l);
- WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
- << " vs " << cond_graph_input_shape;
- }
-
- return loco::NodeShape{this_shape};
- }
+ loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
};
} // namespace
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index e7910bfc0..d28d8ac99 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -257,6 +257,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
return loco::dtype_get(node->boxes());
}
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ return loco::dtype_get(node->boxes());
+ }
+
loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
loco::DataType visit(const luci::CirclePack *node) final
@@ -273,6 +278,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const luci::CirclePadV2 *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
loco::DataType visit(const luci::CirclePow *node) final
{
// TODO make sure types cannot differ
@@ -589,6 +599,17 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
return loco::DataType::S32;
}
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final
+ {
+ (void)node;
+ if (node->index() == 0 || node->index() == 2)
+ {
+ return loco::DataType::S32;
+ }
+ assert(node->index() == 1);
+ return loco::DataType::FLOAT32;
+ }
+
loco::DataType visit(const luci::CircleSplitOut *node) final
{
return loco::dtype_get(node->input());
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 9fd42ed4e..12dd7ff5b 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -96,6 +96,10 @@ addread(MirrorPad_000)
addread(Mul_000)
addread(Mul_U8_000)
addread(Neg_000)
+addread(NonMaxSuppressionV4_000)
+addread(NonMaxSuppressionV4_001)
+addread(NonMaxSuppressionV5_000)
+addread(NonMaxSuppressionV5_001)
addread(NotEqual_000)
addread(OneHot_000)
addread(OneHot_001)
@@ -105,6 +109,7 @@ addread(Pack_000)
addread(Pack_U8_000)
addread(Pad_000)
addread(Pad_U8_000)
+addread(PadV2_000)
addread(Pow_000)
addread(PRelu_000)
addread(Range_000)
@@ -128,6 +133,7 @@ addread(Reshape_002)
addread(Reshape_003)
addread(Reshape_U8_000)
addread(ResizeBilinear_000)
+addread(ResizeBilinear_U8_000)
addread(ResizeNearestNeighbor_000)
addread(ReverseSequence_000)
addread(ReverseV2_000)
@@ -151,6 +157,7 @@ addread(SpaceToBatchND_001)
addread(SpaceToBatchND_002)
addread(SpaceToBatchND_003)
addread(SpaceToDepth_000)
+addread(SpaceToDepth_U8_000)
addread(SparseToDense_000)
addread(Split_000)
addread(SplitV_000)
@@ -166,12 +173,19 @@ addread(Sub_U8_000)
addread(Sum_000)
addread(Sum_001)
addread(Tanh_000)
+addread(Tanh_U8_000)
addread(Tile_000)
addread(Tile_U8_000)
addread(TopKV2_000)
addread(TopKV2_001)
addread(Transpose_000)
addread(TransposeConv_000)
+addread(Unique_000)
+addread(Unique_001)
+addread(Unique_002)
+addread(Unique_003)
+addread(Unique_U8_000)
+addread(Unique_U8_001)
addread(Unpack_000)
addread(Unpack_001)
addread(Unpack_002)
@@ -296,6 +310,10 @@ addwrite(MirrorPad_000)
addwrite(Mul_000)
addwrite(Mul_U8_000)
addwrite(Neg_000)
+addwrite(NonMaxSuppressionV4_000)
+addwrite(NonMaxSuppressionV4_001)
+addwrite(NonMaxSuppressionV5_000)
+addwrite(NonMaxSuppressionV5_001)
addwrite(NotEqual_000)
addwrite(OneHot_000)
addwrite(OneHot_001)
@@ -304,6 +322,7 @@ addwrite(OneHot_003)
addwrite(Pack_000)
addwrite(Pack_U8_000)
addwrite(Pad_000)
+addwrite(PadV2_000)
addwrite(Pow_000)
addwrite(PRelu_000)
addwrite(Range_000)
@@ -327,6 +346,7 @@ addwrite(Reshape_002)
addwrite(Reshape_003)
addwrite(Reshape_U8_000)
addwrite(ResizeBilinear_000)
+addwrite(ResizeBilinear_U8_000)
addwrite(ResizeNearestNeighbor_000)
addwrite(ReverseSequence_000)
addwrite(ReverseV2_000)
@@ -350,6 +370,7 @@ addwrite(SpaceToBatchND_001)
addwrite(SpaceToBatchND_002)
addwrite(SpaceToBatchND_003)
addwrite(SpaceToDepth_000)
+addwrite(SpaceToDepth_U8_000)
addwrite(SparseToDense_000)
addwrite(Split_000)
addwrite(SplitV_000)
@@ -365,12 +386,19 @@ addwrite(Sub_U8_000)
addwrite(Sum_000)
addwrite(Sum_001)
addwrite(Tanh_000)
+addwrite(Tanh_U8_000)
addwrite(Tile_000)
addwrite(Tile_U8_000)
addwrite(TopKV2_000)
addwrite(TopKV2_001)
addwrite(Transpose_000)
addwrite(TransposeConv_000)
+addwrite(Unique_000)
+addwrite(Unique_001)
+addwrite(Unique_002)
+addwrite(Unique_003)
+addwrite(Unique_U8_000)
+addwrite(Unique_U8_001)
addwrite(Unpack_000)
addwrite(Unpack_001)
addwrite(Unpack_002)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index d59e1c529..58c686882 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -83,6 +83,10 @@ while [ "$#" -ne 0 ]; do
esac
done
+if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then
+ echo "Warning: if --v2 option is used, shape will be ignored"
+fi
+
if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
echo "Error: input model not found"
echo ""
@@ -117,16 +121,18 @@ show_err_onexit()
trap show_err_onexit ERR
# generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${INPUT_PATH} "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
-python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+echo "" >> "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
# convert .tflite to .circle
echo " " >> "${OUTPUT_PATH}.log"
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 0a53bd3dd..0b11e7f0b 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -46,7 +46,9 @@ python3 -m venv "${DRIVER_PATH}/venv"
# Install tensorflow
source "${VENV_ACTIVATE}"
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install -U pip setuptools
+ install -U pip==20.2.1 setuptools==49.3.0
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install tensorflow-cpu==2.3.0
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..6460e54cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1.0,
+ 2.0
+ ],
+ [
+ -3.0,
+ -4.0
+ ]
+ ],
+ [
+ [
+ -5.0,
+ 6.0
+ ],
+ [
+ -7.0,
+ 8.0
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 4.0,
+ -2.0
+ ],
+ [
+ 3.0,
+ -1.0
+ ]
+ ],
+ [
+ [
+ -8.0,
+ -6.0
+ ],
+ [
+ 7.0,
+ 5.0
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..a55af0be5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+ "weights": [
+ 4374,
+ 8747
+ ],
+ "scale": [
+ 0.0002286423499283808,
+ 0.0002286423499283808
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..0e481bbfd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0038869199343025684,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..4e12a5550
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json
@@ -0,0 +1,64 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 136,
+ 153
+ ],
+ [
+ 68,
+ 51
+ ]
+ ],
+ [
+ [
+ 34,
+ 221
+ ],
+ [
+ 0,
+ 255
+ ]
+ ]
+ ],
+ [
+ [
+ [
+ 204,
+ 102
+ ],
+ [
+ 187,
+ 119
+ ]
+ ],
+ [
+ [
+ 0,
+ 34
+ ],
+ [
+ 255,
+ 221
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.058823529411764705,
+ 0.058823529411764705
+ ],
+ "zero_point": [
+ 119.0,
+ 136.0
+ ],
+ "min": [
+ -7.0,
+ -8.0
+ ],
+ "max": [
+ 8.0,
+ 7.0
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..7d23cbad2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.05829785391688347,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..af8dc16de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.022708916887640953,
+ "max": 0.9911645770072937
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..5f7bd9942
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.0,
+ "max": 14.86595230102539
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..675eadcb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 1.0352935791015625,
+ 1.976470947265625,
+ 2.9568634033203125,
+ 3.95294189453125
+ ],
+ [
+ -8.972549438476562,
+ 9.976470947265625,
+ -11.011764526367188,
+ 11.9686279296875
+ ]
+ ],
+ [
+ [
+ 5.0039215087890625,
+ 6.023530960083008,
+ 7.035295486450195,
+ 8.01568603515625
+ ],
+ [
+ 13.027450561523438,
+ -14.023529052734375,
+ 14.988235473632812,
+ -16.0313720703125
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..3cda45238
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+ "weights": [
+ 2985,
+ 5473,
+ 7578,
+ 9382
+ ],
+ "scale": [
+ 0.0003349798455903035,
+ 0.0003654325561959198,
+ 0.00039588526680153606,
+ 0.00042633797740715233
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..97931cc58
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.003882720833644271,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..add4d0f35
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 116,
+ 170,
+ 137,
+ 182
+ ],
+ [
+ 0,
+ 255,
+ 0,
+ 255
+ ]
+ ],
+ [
+ [
+ 162,
+ 213,
+ 177,
+ 219
+ ],
+ [
+ 255,
+ 0,
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.08627450980392157,
+ 0.09411764705882353,
+ 0.10196078431372549,
+ 0.10980392156862745
+ ],
+ "zero_point": [
+ 104.0,
+ 149.0,
+ 108.0,
+ 146.0
+ ],
+ "min": [
+ -8.972549019607843,
+ -14.023529411764706,
+ -11.011764705882353,
+ -16.031372549019608
+ ],
+ "max": [
+ 13.027450980392157,
+ 9.976470588235294,
+ 14.988235294117647,
+ 11.968627450980392
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..f587aac24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.07756166160106659,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..fa8fffc3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.003264044094830751,
+ "max": 0.9900938200950622
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..612c0b4ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.0,
+ "max": 19.778222274780273
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json
new file mode 100644
index 000000000..4661cb3ca
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+ "weights": [
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608020782471,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.968626976013184,
+ 4.015686988830566,
+ -2.007843017578125,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..4333c0fed
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+ "weights": [
+ 4099,
+ -8199,
+ -12298,
+ 16398
+ ],
+ "scale": [
+ 0.00024393631821001058,
+ 0.00024393631821001058,
+ 0.00024393631821001058,
+ 0.00024393631821001058
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json
new file mode 100644
index 000000000..8edac1bd9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.003887734841555357,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json
new file mode 100644
index 000000000..1b94f1652
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.061938945204019547,
+ "zero_point": 171.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json
new file mode 100644
index 000000000..5ee46c87f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json
@@ -0,0 +1,100 @@
+{
+ "weights": [
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ],
+ [
+ 144,
+ 160,
+ 80,
+ 64,
+ 48,
+ 224,
+ 16,
+ 255,
+ 192,
+ 96,
+ 176,
+ 112,
+ 1,
+ 32,
+ 240,
+ 208
+ ]
+ ],
+ "scale": [
+ 0.06274509803921569,
+ 0.06274509803921569,
+ 0.06274509803921569,
+ 0.06274509803921569
+ ],
+ "zero_point": [
+ 128.0,
+ 128.0,
+ 128.0,
+ 128.0
+ ],
+ "min": [
+ -8.031372549019608,
+ -8.031372549019608,
+ -8.031372549019608,
+ -8.031372549019608
+ ],
+ "max": [
+ 7.968627450980392,
+ 7.968627450980392,
+ 7.968627450980392,
+ 7.968627450980392
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json
new file mode 100644
index 000000000..48e4645c9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.010438590832054616,
+ "max": 0.9913724160194397
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json
new file mode 100644
index 000000000..ec83b94d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json
@@ -0,0 +1,4 @@
+{
+ "min": -10.584291763305664,
+ "max": 5.210139312744141
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..76a0440a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 0.960784912109375,
+ 2.0588245391845703
+ ],
+ [
+ -3.0196075439453125,
+ -3.980391502380371
+ ],
+ [
+ 4.9411773681640625,
+ -6.039215087890625
+ ]
+ ],
+ [
+ [
+ 7.0,
+ 7.960784912109375
+ ],
+ [
+ -9.058823585510254,
+ -10.019607543945312
+ ],
+ [
+ 10.980392456054688,
+ -11.941176414489746
+ ]
+ ],
+ [
+ [
+ 13.039216995239258,
+ 14.000001907348633
+ ],
+ [
+ -14.960784912109375,
+ -16.05882453918457
+ ],
+ [
+ 17.019607543945312,
+ -17.980392456054688
+ ]
+ ]
+ ]
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..4c3669f6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.0038701011799275875,
+ "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..04e0648de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json
@@ -0,0 +1,60 @@
+{
+ "weights": [
+ [
+ [
+ [
+ 138,
+ 146
+ ],
+ [
+ 109,
+ 102
+ ],
+ [
+ 167,
+ 87
+ ]
+ ],
+ [
+ [
+ 182,
+ 189
+ ],
+ [
+ 65,
+ 58
+ ],
+ [
+ 211,
+ 44
+ ]
+ ],
+ [
+ [
+ 226,
+ 233
+ ],
+ [
+ 22,
+ 14
+ ],
+ [
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": [
+ 0.13725490196078433
+ ],
+ "zero_point": [
+ 131.0
+ ],
+ "min": [
+ -17.980392156862745
+ ],
+ "max": [
+ 17.019607843137255
+ ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..2e1790508
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+ "scale": 0.25486624240875244,
+ "zero_point": 178.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..d46844baf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+ "min": 0.006121497452259064,
+ "max": 0.9868757891654968
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..4441f1876
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+ "min": -45.46586318969727,
+ "max": 19.525028419494628
+}
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst
index 9eb348922..d9fd91761 100644
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -1,4 +1,8 @@
+addTest(Conv2D_004 channel uint8)
addTest(Conv2D_004 layer uint8)
+addTest(DepthwiseConv2D_002 channel uint8)
addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 channel uint8)
addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 channel uint8)
addTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt
new file mode 100644
index 000000000..98e895c04
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.19242816,0.44059092,0.06788187,0.04543579,0.14106855,0.6858487 ,0.6214997 ,0.31582046,0.859484 ,0.3664256 ,0.86936104,0.871024 ,0.68752515,0.5296719 ,0.99137205,0.02956272,0.14838405,0.69830126,0.22359788,0.9060323 ,0.7141239 ,0.5573066 ,0.96645916,0.11426282
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt
new file mode 100644
index 000000000..f480f8086
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57016104,0.2788207 ,0.8045938 ,0.7589986 ,0.81506515,0.8411593 ,0.4162234 ,0.1664247 ,0.5584996 ,0.7799966 ,0.4213713 ,0.97587234,0.79440975,0.5089373 ,0.90030503,0.78015554,0.10080549,0.5115089 ,0.77238286,0.9580212 ,0.8758745 ,0.14367636,0.4304664 ,0.55175275
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt
new file mode 100644
index 000000000..683ea39b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6224246 ,0.30448085,0.29629433,0.44483584,0.30473125,0.6186932 ,0.45563242,0.5394331 ,0.22901213,0.4313142 ,0.4019574 ,0.02263176,0.3806077 ,0.27828163,0.23962335,0.26323524,0.6125012 ,0.5459546 ,0.6340052 ,0.19074932,0.2216875 ,0.77709603,0.03312786,0.02945002
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt
new file mode 100644
index 000000000..56c8c259e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.7524557 ,0.5408983 ,0.07039106,0.5143847 ,0.04857475,0.7305833 ,0.36986747,0.42291477,0.90452653,0.43744263,0.24857366,0.7537328 ,0.04559262,0.65276045,0.3851062 ,0.49503985,0.37213495,0.10627239,0.7085863 ,0.1913133 ,0.08057284,0.31767172,0.9685745 ,0.5942544
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt
new file mode 100644
index 000000000..ecb221e8b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.16251074,0.5574537 ,0.5857036 ,0.877607 ,0.29711136,0.02456062,0.8250261 ,0.21300122,0.5064036 ,0.5882086 ,0.7736793 ,0.09394809,0.98618525,0.6611699 ,0.5001983 ,0.06507304,0.88984424,0.57143325,0.07953393,0.02649987,0.9283147 ,0.65522593,0.18371649,0.12332761
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt
new file mode 100644
index 000000000..f4fb503ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt
new file mode 100644
index 000000000..af4b01576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375 ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt
new file mode 100644
index 000000000..57716034e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829 ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt
new file mode 100644
index 000000000..1e03d83b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597 ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt
new file mode 100644
index 000000000..89ee30a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt
new file mode 100644
index 000000000..9b19de586
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.12934422,0.01033248,0.85648465,0.77248603,0.5128501 ,0.2453174 ,0.05065866,0.6601359 ,0.984665 ,0.57697976,0.58360994,0.79360527,0.90097004,0.26150337,0.1575109 ,0.9711614
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt
new file mode 100644
index 000000000..45247791a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.23895125,0.30275205,0.9916519 ,0.52355504,0.2577219 ,0.03600567,0.75446343,0.8064663 ,0.07550113,0.919774 ,0.84333146,0.48820078,0.31365713,0.97172034,0.7472666 ,0.66353893
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt
new file mode 100644
index 000000000..851e72c7d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6186688 ,0.4357826 ,0.63239735,0.64489084,0.17722449,0.7146202 ,0.5182415 ,0.45549247,0.21316396,0.9769707 ,0.18412311,0.05855984,0.6755795 ,0.8516815 ,0.20649713,0.32990783
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt
new file mode 100644
index 000000000..7ff3c7576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.15501449,0.67026544,0.2957976 ,0.95577955,0.6215903 ,0.2029572 ,0.6069057 ,0.60434276,0.01298514,0.66787016,0.02053251,0.34120578,0.63562113,0.9166186 ,0.7134427 ,0.95491254
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt
new file mode 100644
index 000000000..fe60dbd26
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.46877268,0.36748132,0.09441566,0.4476946 ,0.08834982,0.5387882 ,0.8359256 ,0.4374628 ,0.3835091 ,0.3577151 ,0.49470654,0.6017202 ,0.3546875 ,0.64218026,0.69008195,0.37631917
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt
new file mode 100644
index 000000000..fb728bb70
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.5177879 ,0.10991199,0.19134527,0.25834408,0.16297385,0.5499753 ,0.8782323 ,0.74750453,0.16825114,0.72425395,0.68458 ,0.9399099 ,0.81214494,0.73325175,0.6407931 ,0.02865177,0.04341139,0.44781777,0.59848577,0.72099334,0.654926 ,0.93810713,0.5193446 ,0.8657371 ,0.50826824,0.10122011,0.6946167 ,0.5009533 ,0.27305812,0.7708204 ,0.14410722,0.7092205
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt
new file mode 100644
index 000000000..8c72dc764
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57410187,0.5534829 ,0.434663 ,0.55580896,0.9040647 ,0.16827786,0.82538676,0.25387943,0.7611494 ,0.49195638,0.00602222,0.20389748,0.541152 ,0.962896 ,0.37785006,0.9330408 ,0.9868882 ,0.57428783,0.830525 ,0.67987496,0.5576374 ,0.4303 ,0.8442439 ,0.21868347,0.45653513,0.7913927 ,0.31475154,0.6723579 ,0.5749264 ,0.07061622,0.6450232 ,0.52825755
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt
new file mode 100644
index 000000000..04ff6ae29
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.49751657,0.3004485 ,0.11624487,0.17704253,0.9022095 ,0.24667789,0.9204152 ,0.09801941,0.9194739 ,0.35418576,0.36659864,0.4962548 ,0.83799136,0.58057517,0.2948883 ,0.28411615,0.14429809,0.8460358 ,0.7026028 ,0.25956342,0.5251088 ,0.06569998,0.01754393,0.45209908,0.95638806,0.6044543 ,0.17229715,0.6828144 ,0.8684328 ,0.5829665 ,0.1456113 ,0.3334334
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt
new file mode 100644
index 000000000..1342dac2f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.00850414,0.5746211 ,0.7659193 ,0.8643168 ,0.36803156,0.08386383,0.76002747,0.19255683,0.05220222,0.18169314,0.88597506,0.6793377 ,0.45955214,0.16984127,0.5275391 ,0.910098 ,0.64607793,0.3997594 ,0.38601097,0.40899974,0.10289235,0.896202 ,0.22364503,0.30232555,0.11873382,0.07853477,0.20674925,0.35148785,0.02880615,0.09937044,0.4382221 ,0.53562754
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt
new file mode 100644
index 000000000..e3e85392e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.8097857 ,0.4602844 ,0.01609277,0.7885611 ,0.9090256 ,0.75475484,0.98657864,0.5927874 ,0.73494065,0.374227 ,0.23557834,0.6020654 ,0.0122237 ,0.37126908,0.38277507,0.67635936,0.4139088 ,0.8625733 ,0.37775922,0.15304309,0.6196326 ,0.4827059 ,0.76868814,0.5530773 ,0.3336473 ,0.11217184,0.5877591 ,0.5325879 ,0.48493427,0.6317438 ,0.9385114 ,0.02825027
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index 17c6aa6ff..0ef7cccd1 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -16,12 +16,12 @@
#include "RecordMinMax.h"
#include "RecordFunction.h"
-#include "CircleExpContract.h"
#include "MinMaxObserver.h"
#include "HDF5Importer.h"
#include <luci/Importer.h>
#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
#include <luci/IR/CircleQuantParam.h>
#include <algorithm>
@@ -83,6 +83,15 @@ void RecordMinMax::initialize(const std::string &input_model_path)
}
std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
std::istreambuf_iterator<char>());
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+ model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'");
+ }
+
_module = luci::Importer().importModule(circle::GetModel(model_data.data()));
if (_module == nullptr)
@@ -185,7 +194,8 @@ void RecordMinMax::saveModel(const std::string &output_model_path)
{
// Export to output Circle file
luci::CircleExporter exporter;
- CircleExpContract contract(_module.get(), output_model_path);
+
+ luci::CircleFileExpContract contract(_module.get(), output_model_path);
if (!exporter.invoke(&contract))
{
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index 5a307be16..ca7eddc6f 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,5 +1,13 @@
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+ message(STATUS "Build souschef: FAILED (missing Protobuf")
+ return()
+endif(NOT Protobuf_FOUND)
+
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_library(souschef STATIC ${SOURCES})
set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(souschef PUBLIC include)
+target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Dataset.h b/compiler/souschef/include/souschef/Dataset.h
index 46a12e424..ef67a7316 100644
--- a/compiler/souschef/include/souschef/Dataset.h
+++ b/compiler/souschef/include/souschef/Dataset.h
@@ -19,6 +19,8 @@
#include <vector>
+#include <google/protobuf/repeated_field.h>
+
namespace souschef
{
@@ -57,6 +59,21 @@ private:
std::vector<T> _vec;
};
+template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ std::vector<T> res;
+ for (const auto &elem : field)
+ {
+ res.emplace_back(elem);
+ }
+ return res;
+}
+
+template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+ return Dataset<T>(as_vector<T>(field));
+}
+
} // namespace souschef
#endif // __SOUSCHEF_DATASET_H__
diff --git a/compiler/souschef/include/souschef/Dims.h b/compiler/souschef/include/souschef/Dims.h
new file mode 100644
index 000000000..52c64dd47
--- /dev/null
+++ b/compiler/souschef/include/souschef/Dims.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_DIMS_H__
+#define __SOUSCHEF_DIMS_H__
+
+#include <functional>
+#include <numeric>
+#include <vector>
+
+namespace souschef
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+template <typename SHAPETYPE> Dims<int32_t> as_dims(const SHAPETYPE &shape)
+{
+ std::vector<int32_t> res;
+
+ for (auto &dim : shape.dim())
+ {
+ res.emplace_back(static_cast<int32_t>(dim));
+ }
+
+ return res;
+}
+
+int32_t element_count(const Dims<int32_t> &dims)
+{
+ return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
+}
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_DIMS_H__
diff --git a/compiler/souschef/include/souschef/TensorFiller.h b/compiler/souschef/include/souschef/TensorFiller.h
new file mode 100644
index 000000000..1d87f1372
--- /dev/null
+++ b/compiler/souschef/include/souschef/TensorFiller.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_TENSOR_FILLER_H__
+#define __SOUSCHEF_TENSOR_FILLER_H__
+
+#include <map>
+#include <vector>
+
+namespace souschef
+{
+
+class TensorFiller
+{
+public:
+ virtual ~TensorFiller() = default;
+
+ /**
+ * @brief This will record the tensor by index, if it needs filler option,
+ * such as kernel, bias.
+ */
+ void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
+
+ /**
+ * @brief This will store int32 filler values such as reshape information for the tensor
+ */
+ void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ _tensor_filler_vint32[tensor_index] = expvalues;
+ }
+
+ void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ _tensor_filler_vfloat[tensor_index] = expvalues;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index)
+ {
+ auto it = _tensor_filler.find(tensor_index);
+ if (it != _tensor_filler.end())
+ {
+ return it->second;
+ }
+ return false;
+ }
+
+ /**
+ * @brief This will return true if the tensor by index, needs a int array filler option.
+ */
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+ {
+ auto it = _tensor_filler_vint32.find(tensor_index);
+ if (it != _tensor_filler_vint32.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+ bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+ {
+ auto it = _tensor_filler_vfloat.find(tensor_index);
+ if (it != _tensor_filler_vfloat.end())
+ {
+ expvalues = it->second;
+ return true;
+ }
+ return false;
+ }
+
+private:
+ std::map<uint32_t, bool> _tensor_filler{};
+ std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
+ std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
+};
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_TENSOR_FILLER_H__
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.cpp b/compiler/souschef/src/Dims.cpp
index b56b7eedc..fba4813fc 100644
--- a/compiler/luci-value-test/tester/src/CircleExpContract.cpp
+++ b/compiler/souschef/src/Dims.cpp
@@ -14,20 +14,6 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "souschef/Dims.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
-{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
-
- return fs.good();
-}
+// NOTE Do NOT delete this file; this file checks the completeness of 'Dims.h'
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 692ce48c1..a4b435dfa 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
#include "OpChefs.h"
#include <souschef/Dataset.h>
+#include <souschef/Dims.h>
#include "Log.h"
@@ -41,52 +42,8 @@
#include <sstream>
#include <stdexcept>
-namespace
-{
-
using namespace souschef;
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- std::vector<T> res;
- for (const auto &elem : field)
- {
- res.emplace_back(elem);
- }
- return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
- return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const tflchef::TensorShape &shape)
-{
- std::vector<int32_t> res;
-
- for (auto &dim : shape.dim())
- {
- res.emplace_back(static_cast<int32_t>(dim));
- }
-
- return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
- return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
namespace
{
diff --git a/compiler/record-minmax/src/CircleExpContract.cpp b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp
index b703250bd..500aa467f 100644
--- a/compiler/record-minmax/src/CircleExpContract.cpp
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp
@@ -14,25 +14,17 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-namespace record_minmax
-{
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+flatbuffers::Offset<void> NonMaxSuppressionV5Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+ tflite::NonMaxSuppressionV5OptionsBuilder options_builder{fbb};
- std::ofstream fs(_filepath, std::ofstream::binary);
- fs.write(ptr, size);
-
- return fs.good();
+ return options_builder.Finish().Union();
}
-} // namespace record_minmax
+std::unique_ptr<OpChef>
+NonMaxSuppressionV5ChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new NonMaxSuppressionV5Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h
new file mode 100644
index 000000000..a3c8b6009
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V5_H__
+#define __OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV5Chef final : public OpChef
+{
+public:
+ explicit NonMaxSuppressionV5Chef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V5;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_NonMaxSuppressionV5Options;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV5ChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index 244186265..6b242e811 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -56,6 +56,7 @@ OP_CHEF(MirrorPad, MirrorPadChefFactory)
OP_CHEF(Mul, MulChefFactory)
OP_CHEF(Neg, NegChefFactory)
OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
+OP_CHEF(NonMaxSuppressionV5, NonMaxSuppressionV5ChefFactory)
OP_CHEF(NotEqual, NotEqualChefFactory)
OP_CHEF(OneHot, OneHotChefFactory)
OP_CHEF(Pack, PackChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 5b2e89bd9..7637b1c69 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -69,6 +69,7 @@
#include "Op/Mul.h"
#include "Op/Neg.h"
#include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 70b966ec3..9909d517a 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -371,6 +371,10 @@ message NonMaxSuppressionV4Options {
// None
}
+message NonMaxSuppressionV5Options {
+ // None
+}
+
message NotEqualOptions {
// None
}
@@ -544,7 +548,7 @@ message Operation {
// HardSwishOptions 196
optional DepthToSpaceOptions depth_to_space_options = 197;
optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
- // NonMaxSuppressionV5Options 199
+ optional NonMaxSuppressionV5Options non_max_suppression_v5_options = 199;
optional ScatterNdOptions scatter_nd_options = 200;
optional NotEqualOptions notequal_options = 201;
optional ExpandDimsOptions expand_dims_options = 202;
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 645c16144..83127cb3e 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(tflchef_tflite tflchef_proto)
target_link_libraries(tflchef_tflite mio_tflite)
target_link_libraries(tflchef_tflite stdex)
target_link_libraries(tflchef_tflite cwrap)
+target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..db7f4c932
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV5.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+ assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+ const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+ assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+ assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ const tflite::Tensor *soft_nms_sigma_tensor = import->tensors()->Get(inputs[5]);
+ assert(soft_nms_sigma_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ for (int32_t index = 2; index < 6; ++index)
+ {
+ fill_tensor_to_import(index, import);
+ }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV5::build(const tflite::Operator *op,
+ TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("NonMaxSuppressionV5");
+
+ return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
new file mode 100644
index 000000000..c948043f4
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V5
+ */
+class TFliteOpNonMaxSuppressionV5 : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
index 5b46f4501..9d0a642ab 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -19,6 +19,8 @@
#include <mio/tflite/schema_generated.h>
+#include <souschef/TensorFiller.h>
+
#include <tflchef.pb.h>
#include <map>
@@ -40,7 +42,7 @@ bool is_custom(const tflite::OperatorCode *opcode);
/**
* @brief Loads TF lite file and provides helpers to access attributes
*/
-class TFliteImport
+class TFliteImport : public souschef::TensorFiller
{
public:
TFliteImport(const tflite::Model *model);
@@ -63,63 +65,6 @@ public:
std::string opcode_name(const tflite::Operator *op) const;
size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data);
- /**
- * @brief This will record the tensor by index, if it needs filler option,
- * such as kernel, bias.
- */
- void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
- /**
- * @brief This will store int32 filler values such as reshape information for the tensor
- */
- void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- _tensor_filler_vint32[tensor_index] = expvalues;
- }
-
- void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- _tensor_filler_vfloat[tensor_index] = expvalues;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index)
- {
- auto it = _tensor_filler.find(tensor_index);
- if (it != _tensor_filler.end())
- {
- return it->second;
- }
- return false;
- }
-
- /**
- * @brief This will return true if the tensor by index, needs a int array filler option.
- */
- bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
- {
- auto it = _tensor_filler_vint32.find(tensor_index);
- if (it != _tensor_filler_vint32.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
- bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
- {
- auto it = _tensor_filler_vfloat.find(tensor_index);
- if (it != _tensor_filler_vfloat.end())
- {
- expvalues = it->second;
- return true;
- }
- return false;
- }
-
private:
const TFliteSubGraphs_t *_subgraphs{nullptr};
const TFliteBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
std::vector<const tflite::OperatorCode *> _op_codes{};
std::vector<int32_t> _inputs{};
std::vector<int32_t> _outputs{};
-
- std::map<uint32_t, bool> _tensor_filler{};
- std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
- std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
};
} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index de14e37d1..36a010957 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -69,6 +69,7 @@
#include "Op/Mul.h"
#include "Op/Neg.h"
#include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
index 8d33007be..a454e98b6 100644
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -106,6 +106,7 @@ private:
REG_TFL_OP(MUL, TFliteOpMul);
REG_TFL_OP(NEG, TFliteOpNeg);
REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
+ REG_TFL_OP(NON_MAX_SUPPRESSION_V5, TFliteOpNonMaxSuppressionV5);
REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
REG_TFL_OP(PACK, TFliteOpPack);
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index df027c3e3..24b9264ff 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -677,9 +677,11 @@ OpPrinterRegistry::OpPrinterRegistry()
_op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
// There is no Option for NON_MAX_SUPPRESSION_V4
+ // There is no Option for NON_MAX_SUPPRESSION_V5
_op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
+ // There is no Option for PADV2
// There is no Option for PRELU
// There is no Option for RELU
// There is no Option for RELU6
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index 00b3de943..680118618 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -63,6 +63,7 @@
#include "BuildBuiltinOptions/MulOptions.h"
#include "BuildBuiltinOptions/NegOptions.h"
#include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV5Options.h"
#include "BuildBuiltinOptions/NotEqualOptions.h"
#include "BuildBuiltinOptions/OneHotOptions.h"
#include "BuildBuiltinOptions/PackOptions.h"
diff --git a/compiler/circle2circle/src/CircleExpContract.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp
index b56b7eedc..637c544ff 100644
--- a/compiler/circle2circle/src/CircleExpContract.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp
@@ -14,20 +14,17 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5Options.h"
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace tflite2circle
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
- return fs.good();
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *)
+{
+ circle::NonMaxSuppressionV5OptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h
new file mode 100644
index 000000000..faf989acc
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index cb4437a49..14c44cb36 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -119,6 +119,75 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
// is_variable
bool is_variable = it->is_variable();
+ flatbuffers::Offset<circle::SparsityParameters> sparsity;
+ // sparsity
+ if (it->sparsity())
+ {
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata;
+
+ // traversal_order
+ if (it->sparsity()->traversal_order())
+ {
+ auto traversal_order_vec = std::vector<int32_t>{
+ it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+ traversal_order = fb->CreateVector(traversal_order_vec);
+ }
+
+ // block_map
+ if (it->sparsity()->block_map())
+ {
+ auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
+ it->sparsity()->block_map()->end()};
+ block_map = fb->CreateVector(block_map_vec);
+ }
+
+ // dim_metadata
+ std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+ auto tflite_dim_metadata = it->sparsity()->dim_metadata();
+ for (auto it : *tflite_dim_metadata)
+ {
+ // array_segments
+ auto tflite_array_segments_type = it->array_segments_type();
+ auto circle_array_segments =
+ get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type);
+ auto circle_array_segments_type =
+ get_circle_sparse_index_vector_type(tflite_array_segments_type);
+
+ // array_indices
+ auto tflite_array_indices_type = it->array_indices_type();
+ auto circle_array_indices =
+ get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type);
+ auto circle_array_indices_type =
+ get_circle_sparse_index_vector_type(tflite_array_indices_type);
+
+ auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+
+ circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
+ circle_dim_metadata_builder.add_dense_size(it->dense_size());
+ circle_dim_metadata_builder.add_array_segments(circle_array_segments);
+ circle_dim_metadata_builder.add_array_segments_type(circle_array_segments_type);
+ circle_dim_metadata_builder.add_array_indices(circle_array_indices);
+ circle_dim_metadata_builder.add_array_indices_type(circle_array_indices_type);
+ auto dim_metadata = circle_dim_metadata_builder.Finish();
+ dim_metadata_vec.emplace_back(dim_metadata);
+ }
+ dim_metadata = fb->CreateVector(dim_metadata_vec);
+
+ sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+ }
+
+ // shape signature
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+ if (it->shape_signature())
+ {
+ auto shape_signature_vec =
+ std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+ shape_signature = fb->CreateVector(shape_signature_vec);
+ }
+
circle::TensorBuilder tensor_builder{*fb};
tensor_builder.add_shape(shape);
tensor_builder.add_type(get_circle_tensortype(it->type()));
@@ -126,6 +195,8 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
tensor_builder.add_name(name);
tensor_builder.add_quantization(quantization);
tensor_builder.add_is_variable(is_variable);
+ tensor_builder.add_sparsity(sparsity);
+ tensor_builder.add_shape_signature(shape_signature);
auto tensor = tensor_builder.Finish();
tensor_vec.emplace_back(tensor);
}
@@ -226,6 +297,14 @@ CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
: _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
{
const tflite::Model *tfl_model = model.load_model();
+ // verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
+ model._data.size()};
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ throw std::runtime_error("ERROR: Failed to verify tflite");
+ }
+
_operator_codes_offset =
std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
_subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp
index b0d35d1a5..75504b062 100644
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -123,4 +123,79 @@ circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode)
}
}
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type)
+{
+ switch (tfl_dim_type)
+ {
+ case tflite::DimensionType_DENSE:
+ return circle::DimensionType_DENSE;
+ case tflite::DimensionType_SPARSE_CSR:
+ return circle::DimensionType_SPARSE_CSR;
+ default:
+ throw std::runtime_error("tflite2circle: wrong dimension type.");
+ }
+}
+
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::DimensionMetadata *dm,
+ const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+ switch (tfl_sparse_index_vector_type)
+ {
+ case tflite::SparseIndexVector_NONE:
+ return flatbuffers::Offset<void>();
+ case tflite::SparseIndexVector_Int32Vector:
+ {
+ auto values_vec_int32 =
+ std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(),
+ dm->array_segments_as_Int32Vector()->values()->end()};
+ auto values_int32 = fb.CreateVector(values_vec_int32);
+ circle::Int32VectorBuilder int32_vector_builder{fb};
+ int32_vector_builder.add_values(values_int32);
+ return int32_vector_builder.Finish().Union();
+ }
+ case tflite::SparseIndexVector_Uint16Vector:
+ {
+ auto values_vec_uint16 =
+ std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(),
+ dm->array_segments_as_Uint16Vector()->values()->end()};
+ auto values_uint16 = fb.CreateVector(values_vec_uint16);
+ circle::Uint16VectorBuilder uint16_vector_builder{fb};
+ uint16_vector_builder.add_values(values_uint16);
+ return uint16_vector_builder.Finish().Union();
+ }
+ case tflite::SparseIndexVector_Uint8Vector:
+ {
+ auto values_vec_uint8 =
+ std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(),
+ dm->array_segments_as_Uint8Vector()->values()->end()};
+ auto values_uint8 = fb.CreateVector(values_vec_uint8);
+ circle::Uint8VectorBuilder uint8_vector_builder{fb};
+ uint8_vector_builder.add_values(values_uint8);
+ return uint8_vector_builder.Finish().Union();
+ }
+ default:
+ throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+ }
+}
+
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+ switch (tfl_sparse_index_vector_type)
+ {
+ case tflite::SparseIndexVector_NONE:
+ return circle::SparseIndexVector_NONE;
+ case tflite::SparseIndexVector_Int32Vector:
+ return circle::SparseIndexVector_Int32Vector;
+ case tflite::SparseIndexVector_Uint16Vector:
+ return circle::SparseIndexVector_Uint16Vector;
+ case tflite::SparseIndexVector_Uint8Vector:
+ return circle::SparseIndexVector_Uint8Vector;
+ default:
+ throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+ }
+}
+
} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h
index 7ea01b9c8..26ad74666 100644
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -76,6 +76,25 @@ circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *o
*/
circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
+/**
+ * @brief Returns circle DimensionType according to tflite.
+*/
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
+
+/**
+ * @brief Returns circle SparseIndexVector according to tflite.
+*/
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::DimensionMetadata *dm,
+ const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
+/**
+ * @brief Returns circle SparseIndexVector type according to tflite.
+*/
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
} // namespace tflite2circle
#endif // __DATA_LOOKUP_H__
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index a2a14538e..22b59863b 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -101,7 +101,7 @@ TFL_BUILTIN_OPTIONS(IfOptions)
TFL_BUILTIN_OPTIONS(WhileOptions)
TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
TFL_BUILTIN_OPTIONS(RankOptions)
TFL_BUILTIN_OPTIONS(ScatterNdOptions)
TFL_BUILTIN_OPTIONS(SegmentSumOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index b8cb79331..be4398996 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000080001)
+ set(VCONONE_VERSION 0x0000000000090001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index a41e6db60..38401100c 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -59,6 +59,8 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
const size_t num_of_kernels = axis.size();
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
+
// Create temporary tensor infos
auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index 09f178005..aa165cc15 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h
index 886ce5e5e..c0c9313ea 100644
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -106,6 +106,9 @@ struct SoftmaxParams
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
+ int32_t zero_point;
+ float scale;
+ float *table;
};
struct PackParams
diff --git a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
index a27871edb..dc3e2552d 100644
--- a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
+++ b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
@@ -1,17 +1,19 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
#define __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
index 5af2e4836..c6f1e2ee7 100644
--- a/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
+++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
@@ -1,17 +1,19 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
#define __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
diff --git a/compute/cker/include/cker/neon/neon_check.h b/compute/cker/include/cker/neon/neon_check.h
index 119d82719..116f01bb7 100644
--- a/compute/cker/include/cker/neon/neon_check.h
+++ b/compute/cker/include/cker/neon/neon_check.h
@@ -1,17 +1,20 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
#ifndef __NNFW_CKER_NEON_CHECK_H__
#define __NNFW_CKER_NEON_CHECK_H__
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h
index de43ba3bd..6149cafa7 100644
--- a/compute/cker/include/cker/operation/AveragePool.h
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -32,7 +32,16 @@ namespace cker
{
// TODO Change to apply neon for this function if it is faster
-inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+template <typename T>
+void AveragePool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+ static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+ "cker::MaxPool : This function supports only integer or floating point");
+ throw std::runtime_error("cker::AveragePool : Unsupported data type");
+}
+
+template <>
+void AveragePool<float>(const PoolParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
assert(input_shape.DimensionsCount() == 4);
@@ -371,8 +380,10 @@ inline void AveragePool32(const PoolParams &params, const Shape &input_shape,
}
}
-inline void AveragePool(const PoolParams &params, const Shape &input_shape,
- const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+template <>
+void AveragePool<uint8_t>(const PoolParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape,
+ uint8_t *output_data)
{
if (params.filter_height * params.filter_width > 16 * 16)
{
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
index 1bf191bf8..214f2e612 100644
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -23,6 +23,7 @@
#include "cker/Utils.h"
#include "cker/operation/reference/Conv.h"
#include "cker/operation/optimized/Conv.h"
+#include <iostream>
#include <vector>
namespace nnfw
@@ -54,18 +55,15 @@ inline void TransposeFloatTensor(const float *input_data, const nnfw::cker::Shap
class Conv
{
public:
- Conv()
- : _modified_filter_data(), _im2col_data(), _im2col_shape(4), _need_im2col(false),
- _prepared(false)
- {
- }
+ Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
- bool &is_replaced_weights)
+ bool &is_replaced_weights, uint32_t dilationWidthFactor,
+ uint32_t dilationHeightFactor)
{
if (!_prepared)
{
- if (usableMultiThreaded(padding_type))
+ if (usableMultiThreaded(padding_type, dilationWidthFactor, dilationHeightFactor))
{
transposeFilter(filter_shape, filter_data, is_replaced_weights);
}
@@ -87,7 +85,8 @@ public:
const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
const float *bias_data, const Shape &output_shape, float *output_data)
{
- if (usableMultiThreaded(params.padding_type))
+ if (usableMultiThreaded(params.padding_type, params.dilation_width_factor,
+ params.dilation_height_factor))
{
bool transposed_in_execution = false;
if (!_prepared)
@@ -119,15 +118,29 @@ public:
params.stride_height);
}
- uint8_t *im2col_raw_data = _im2col_data.data();
- optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
- bias_data, output_shape, output_data, _im2col_shape, im2col_raw_data);
+ int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 1;
+
+ // Use heap if size is larger than 8MB
+ if (im2col_size > 8 * 1024 * 1024)
+ {
+ std::unique_ptr<uint8_t[]> im2col_data = std::make_unique<uint8_t[]>(im2col_size);
+ optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, _im2col_shape, im2col_data.get());
+ }
+ else
+ {
+ uint8_t im2col_data[im2col_size];
+ optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, _im2col_shape, im2col_data);
+ }
}
private:
- bool usableMultiThreaded(PaddingType padding_type)
+ bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
+ int32_t dilation_height_factor)
{
- return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1;
+ return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1 &&
+ dilation_width_factor == 1 && dilation_height_factor == 1;
}
void transposeFilter(const Shape &filter_shape, const float *filter_data,
@@ -151,13 +164,11 @@ private:
_im2col_shape.SetDim(1, output_shape.Dims(1));
_im2col_shape.SetDim(2, output_shape.Dims(2));
_im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
- _im2col_data.resize(_im2col_shape.FlatSize());
}
}
private:
std::vector<float> _modified_filter_data;
- std::vector<uint8_t> _im2col_data;
Shape _im2col_shape;
bool _need_im2col;
bool _prepared;
diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/compute/cker/include/cker/operation/Erf.h
index 631505f36..a9be3654a 100644
--- a/runtime/onert/core/src/ir/operation/Sin.cc
+++ b/compute/cker/include/cker/operation/Erf.h
@@ -14,26 +14,29 @@
* limitations under the License.
*/
-#include "ir/operation/Sin.h"
+#ifndef __NNFW_CKER_ERF_H__
+#define __NNFW_CKER_ERF_H__
-#include <cassert>
+#include "cker/Shape.h"
-#include "ir/OperationVisitor.h"
+#include <cmath>
-namespace onert
+namespace nnfw
{
-namespace ir
+namespace cker
{
-namespace operation
-{
-
-void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
-Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+inline void Erf(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
{
+ const int size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < size; i++)
+ {
+ output_data[i] = std::erf(input_data[i]);
+ }
}
-} // namespace operation
-} // namespace ir
-} // namespace onert
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ERF_H__
diff --git a/compute/cker/include/cker/operation/LogSoftMax.h b/compute/cker/include/cker/operation/LogSoftMax.h
index 326a3eede..326a44f0c 100644
--- a/compute/cker/include/cker/operation/LogSoftMax.h
+++ b/compute/cker/include/cker/operation/LogSoftMax.h
@@ -77,6 +77,62 @@ inline void LogSoftmax(const SoftmaxParams &params, const Shape &input_shape,
}
}
+inline void LogSoftmax(const SoftmaxParams &params, float input_scale, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+ const int rank = input_shape.DimensionsCount();
+ const int axis = (params.axis < 0) ? params.axis + rank : params.axis;
+ const double beta = params.beta;
+ const int depth = MatchingDim(input_shape, axis, output_shape, axis);
+
+ const int32_t clamp_max = std::numeric_limits<uint8_t>::max();
+ const int32_t clamp_min = std::numeric_limits<uint8_t>::min();
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int inner_size = 1;
+ for (int i = axis + 1; i < rank; ++i)
+ {
+ inner_size *= input_shape.Dims(i);
+ }
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < inner_size; ++j)
+ {
+ uint8_t max_val = std::numeric_limits<uint8_t>::min();
+ for (int c = 0; c < depth; ++c)
+ {
+ max_val = std::max(max_val, input_data[(i * depth + c) * inner_size]);
+ }
+
+ float sum_exp = 0.0f;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const float *table_offset = &params.table[max_uint8 - max_val];
+ for (int c = 0; c < depth; ++c)
+ {
+ sum_exp += table_offset[input_data[(i * depth + c) * inner_size]];
+ }
+ const float log_sum_exp = std::log(sum_exp);
+
+ const float scale = input_scale / params.scale;
+ const float precomputed = (input_scale * max_val * beta + log_sum_exp) / params.scale;
+ for (int c = 0; c < depth; ++c)
+ {
+ const float log_prob =
+ scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
+ const int32_t prob_quantized = std::rint(log_prob) + params.zero_point;
+ output_data[(i * depth + c) * inner_size] =
+ static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ }
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h
index 339547298..ea3fcaca6 100644
--- a/compute/cker/include/cker/operation/MaxPool.h
+++ b/compute/cker/include/cker/operation/MaxPool.h
@@ -31,7 +31,15 @@ namespace nnfw
namespace cker
{
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+template <typename T> void MaxPool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+ static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+ "cker::MaxPool : This function supports only integer or floating point");
+ throw std::runtime_error("cker::MaxPool : Unsupported data type");
+}
+
+template <>
+void MaxPool<float>(const PoolParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
assert(input_shape.DimensionsCount() == 4);
@@ -86,8 +94,9 @@ inline void MaxPool(const PoolParams &params, const Shape &input_shape, const fl
}
}
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
- const Shape &output_shape, uint8_t *output_data)
+template <>
+void MaxPool<uint8_t>(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &output_shape, uint8_t *output_data)
{
// Here, and in other pooling ops, in order to maintain locality of reference,
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h
index bb394619e..13e50b87a 100644
--- a/compute/cker/include/cker/operation/SoftMax.h
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -32,6 +32,44 @@ namespace nnfw
namespace cker
{
+// Performs softmax along the input of size (input_size * batch_size).
+inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ assert(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
index 2b2ea8fbc..ac5069917 100644
--- a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
@@ -148,9 +148,73 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
uint8_t *output_data)
{
int i = 0;
+
+#ifdef USE_NEON
+ const uint8x8_t output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+ const uint8x8_t output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+ for (; i <= size - 8; i += 8)
+ {
+ const uint8x8_t input1_val_original = vld1_u8(input1_data + i);
+ const uint8x8_t input2_val_original = vld1_u8(input2_data + i);
+ const int16x8_t input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+ const int16x8_t input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+ const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+ const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+ const int16x4_t input1_val_high = vget_high_s16(input1_val);
+ const int16x4_t input1_val_low = vget_low_s16(input1_val);
+ const int16x4_t input2_val_high = vget_high_s16(input2_val);
+ const int16x4_t input2_val_low = vget_low_s16(input2_val);
+ int32x4_t x11 = vmovl_s16(input1_val_low);
+ int32x4_t x12 = vmovl_s16(input1_val_high);
+ int32x4_t x21 = vmovl_s16(input2_val_low);
+ int32x4_t x22 = vmovl_s16(input2_val_high);
+ const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+ x11 = vshlq_s32(x11, left_shift_dup);
+ x12 = vshlq_s32(x12, left_shift_dup);
+ x21 = vshlq_s32(x21, left_shift_dup);
+ x22 = vshlq_s32(x22, left_shift_dup);
+ x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+ x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+ x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+ x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+ const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+ const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+ x11 = vshlq_s32(x11, input1_shift_dup);
+ x12 = vshlq_s32(x12, input1_shift_dup);
+ x21 = vshlq_s32(x21, input2_shift_dup);
+ x22 = vshlq_s32(x22, input2_shift_dup);
+ int32x4_t s1 = vaddq_s32(x11, x21);
+ int32x4_t s2 = vaddq_s32(x12, x22);
+ s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+ s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ s1 = RoundingDivideByPOT(s1, -params.output_shift);
+ s2 = RoundingDivideByPOT(s2, -params.output_shift);
+ const int16x4_t s1_narrowed = vmovn_s32(s1);
+ const int16x4_t s2_narrowed = vmovn_s32(s2);
+ const int16x8_t s =
+ vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+ const uint8x8_t clamped = vmax_u8(output_activation_min_vector,
+ vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+ vst1_u8(output_data + i, clamped);
+ }
+#endif // NEON
for (; i < size; ++i)
{
- int32_t clamped_output = quant8_sum(params, input1_data[i], input2_data[i]);
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+ const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
+ const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, params.output_multiplier, params.output_shift) +
+ params.output_offset;
+ const int32_t clamped_output = std::min(params.quantized_activation_max,
+ std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
@@ -392,10 +456,62 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
uint8_t *output_data)
{
int i = 0;
- int32_t clamped_output;
- for (; i < size; i++)
+
+#ifdef USE_NEON
+ const auto input1_offset_vector = vdupq_n_s16(params.input1_offset);
+ const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+ const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+ const auto output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+ const auto output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+ const int left_shift = std::max(0, params.output_shift);
+ const int right_shift = std::max(0, -params.output_shift);
+ const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+ for (; i <= size - 8; i += 8)
+ {
+ // We load / store 8 at a time, multiplying as two sets of 4 int32s.
+ const auto input1_val_original = vld1_u8(input1_data + i);
+ const auto input2_val_original = vld1_u8(input2_data + i);
+ const auto input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+ const auto input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+ const auto input1_val = vaddq_s16(input1_val_s16, input1_offset_vector);
+ const auto input2_val = vaddq_s16(input2_val_s16, input2_offset_vector);
+
+ const auto input1_val_low = vget_low_s16(input1_val);
+ const auto input1_val_high = vget_high_s16(input1_val);
+ const auto input2_val_low = vget_low_s16(input2_val);
+ const auto input2_val_high = vget_high_s16(input2_val);
+
+ auto p1 = vmull_s16(input2_val_low, input1_val_low);
+ auto p2 = vmull_s16(input2_val_high, input1_val_high);
+
+ p1 = vshlq_s32(p1, left_shift_vec);
+ p2 = vshlq_s32(p2, left_shift_vec);
+ p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+ p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ p1 = RoundingDivideByPOT(p1, right_shift);
+ p2 = RoundingDivideByPOT(p2, right_shift);
+
+ const auto p1_narrowed = vqmovn_s32(p1);
+ const auto p2_narrowed = vqmovn_s32(p2);
+ const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector);
+ const auto clamped = vmax_u8(output_activation_min_vector,
+ vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
+ vst1_u8(output_data + i, clamped);
+ }
+#endif // NEON
+
+ for (; i < size; ++i)
{
- clamped_output = quant8_mul(params, input1_data[i], input2_data[i]);
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t unclamped_result =
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
+ const int32_t clamped_output =
+ std::min(params.quantized_activation_max,
+ std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
diff --git a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
index 3f4ff8afb..ae1f9e78e 100644
--- a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
+++ b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
@@ -116,19 +116,106 @@ inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h,
}
}
+// Supports per-batch zero_byte for per-batch asymmetric quantized inputs.
+template <typename T>
+void DilatedIm2col(const ConvParams &params, const Shape &input_shape, const T *input_data,
+ const Shape &filter_shape, const Shape &output_shape, T *im2col_data,
+ const int32_t *zero_bytes, const int zero_bytes_len)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ // For dilated convolution, the input pixels are not contiguous therefore we
+ // can't use the same optimizations as Im2Col(). Though note this code would
+ // work fine for the non-dilated case too (though likely a bit slower).
+ assert(dilation_width_factor != 1 || dilation_height_factor != 1);
+ assert(im2col_data);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ MatchingDim(output_shape, 3, filter_shape, 0);
+
+ // Construct the MxN sized im2col matrix.
+ // The rows M, are sub-ordered B x H x W
+ const Shape row_shape({1, batches, output_height, output_width});
+ // The columns, N, are sub-ordered Kh x Kw x Din
+ const Shape col_shape({1, filter_height, filter_width, input_depth});
+ // Use dimensions M and N to construct dims for indexing directly into im2col
+ const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+ // Loop through the output rows (B x H x W)
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ const T zero_byte =
+ zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ // Each im2col row is an output pixel. Arrange the input data in this
+ // row in an order we can conveniently multiply with the filter data.
+ int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ // Loop through all the pixels of the filter (Kh x Kw)
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ if ((in_y >= 0) && (in_y < input_height))
+ {
+ // Filter row is within the input data.
+ // Loop through all the filter pixels in this row.
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ if ((in_x >= 0) && (in_x < input_width))
+ {
+ // Filter pixel is within the input, copy the input data.
+ T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0);
+ memcpy(dst, src, input_depth * sizeof(T));
+ }
+ else
+ {
+ // Filter pixel is outside the input, zero it out.
+ memset(dst, zero_byte, input_depth * sizeof(T));
+ }
+ }
+ }
+ else
+ {
+ // Filter row is outside the input, zero out the entire filter row.
+ int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
+ }
+ }
+ }
+ }
+ }
+}
+
template <typename T>
void DilatedIm2col(const ConvParams &params, uint8_t zero_byte, const Shape &input_shape,
const T *input_data, const Shape &filter_shape, const Shape &output_shape,
T *im2col_data)
{
- (void)params;
- (void)zero_byte;
- (void)input_shape;
- (void)input_data;
- (void)filter_shape;
- (void)output_shape;
- (void)im2col_data;
- throw std::runtime_error{"NYI: cker DilatedIm2col"};
+ const int32_t zero_point = static_cast<int32_t>(zero_byte);
+ DilatedIm2col<T>(params, input_shape, input_data, filter_shape, output_shape, im2col_data,
+ &zero_point, 1);
}
template <typename T>
diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
index 7a2b896fc..f7e39248c 100644
--- a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
@@ -37,7 +37,7 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
const T *input2_data, const Shape &output_shape, T *output_data,
const std::function<T(const T &, const T &)> &fn)
{
- const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i)
{
output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
@@ -53,7 +53,7 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
float *output_data,
const std::function<float(const float &, const float &)> &fn)
{
- const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < size; i++)
{
output_data[i] =
diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt
index c016d2bcc..92aac3e72 100644
--- a/compute/test/CMakeLists.txt
+++ b/compute/test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
set(TEST_COMPUTE test_compute)
file(GLOB_RECURSE TESTS "*.cc")
diff --git a/docs/conf.py b/docs/conf.py
index 649b677a9..9b870097a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.8.0'
+release = '1.9.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
index 4e295baee..ab449c4be 100644
--- a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
+++ b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
@@ -176,6 +176,63 @@ void Dumper::visit(const Select &node)
}
```
+5. Add code for shape inference
+- ONE runtime tries to calculate shapes and allocate memory during compilation time. For some calculations of output shapes that cannot be done during compilation time, ONE runtime will calculate shapes and allocate memory during execution time.
+- Calculation of shapes during compilation time is called _static shape inference_ and calculation of shapes during execution time is called _dynamic shape inference_.
+- [`StaticShapeInference.h`](`/runtime/onert/compiler/StaticShapeInference.h`)
+
+```CPP
+ void visit(const ir::operation::Select &op) override;
+```
+- [`StaticShapeInference.cc`](/runtime/onert/core/src/compiler/StaticShapeInference.cc)
+```CPP
+void StaticShapeInferer::visit(const ir::operation::Select &op)
+{
+ const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
+ const auto &input_cond = _operands.at(input_cond_idx);
+
+ const auto &input_true = ...
+ const auto &input_false = ...
+ ir::Operand &output = ...
+
+ // Select output shpae
+ ir::Shape new_shape = shape_inference::inferSelectShape(
+ input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+ output.info().shape(new_shape);
+}
+```
+- [`DynamicShapeInference.h`](/runtime/onert/core/include/exec/DynamicShapeInference.h)
+```CPP
+ void visit(const ir::operation::Select &op) override;
+```
+- [`DynamicShapeInference.cc`](/runtime/onert/core/src/exec/DynamicShapeInference.cc)
+```CPP
+void DynamicShapeInferer::visit(const ir::operation::Select &op)
+{
+ const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
+ const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
+
+ const auto &input_true = ...
+ const auto &input_false = ...
+ auto output = ...
+
+ if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
+ {
+ return;
+ }
+
+ auto input_cond_shape = input_cond->getShape();
+ auto input_true_shape = input_true->getShape();
+ auto input_false_shape = input_false->getShape();
+
+ // Select output shpae
+ ir::Shape new_shape =
+ shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+}
+```
+
## Frontend
This module generates IR from a model. There are two kinds of frontend: Loader and NNAPI. First, Loader loads a model file and generates IR from it. Second, NNAPI generates IR from a model set via [Neural Networks API of android](https://developer.android.com/ndk/guides/neuralnetworks)
diff --git a/docs/release/1.9/release-note-1.9.0.md b/docs/release/1.9/release-note-1.9.0.md
new file mode 100644
index 000000000..5ac434b30
--- /dev/null
+++ b/docs/release/1.9/release-note-1.9.0.md
@@ -0,0 +1,38 @@
+# Release Note 1.9.0
+
+## ONE Compiler
+
+### Compiler supports more operations
+
+- NonMaxSuppressionV4, NonMaxSuppressionV5, PadV2, Unique
+
+### Changes
+
+- Quantization enhancements: channel wise UINT8 quantization(Conv2D, DepwiseConv, TransposeConv, FullyConnected)
+- Experimental requantization from INT8 to UINT8
+- Adding more operator value tests
+- tf2tfliteV2 supports conversion from Keras model, saved model
+- Refactoring for better maintenance long Class codes using visitor patterns
+- Introducing optimization pass that fuses batch normalization with Transposed Convolution.
+
+
+## ONE Runtime
+
+### Runtime backend operation support
+
+- CPU backend: RANK
+- CPU backend qasymm uint8: LOG_SOFTMAX
+- ACL-CL backend: LEAKY_RELU, RESIZE_NEAREST_NEIGHBOR
+
+
+### Optimization
+
+- Copy Elimination between compatible backends
+
+### Operation Implementation
+
+- Operations with same parameters are unified
+
+### Change
+
+- CPU backend qasymm uint8 performance enhancement: arithmetic operations
diff --git a/docs/runtime/compute.md b/docs/runtime/compute.md
index 3768cf013..857a589d8 100644
--- a/docs/runtime/compute.md
+++ b/docs/runtime/compute.md
@@ -1 +1,13 @@
# Compute
+
+`compute` directory is for the libraries for actual computation of neural network operations. These libraries are used by backends. Currently we have two libraries.
+
+## ARMComputeEx
+
+It is an extension of ARM [ComputeLibrary](https://github.com/ARM-software/ComputeLibrary), in order to support some operations that are not yet supported by ComputeLibrary. It is used by `acl_cl` and `acl_neon` backends.
+
+The code structure looks just like ComputeLibrary's. Some of the code could be copied from the latest version of ComputeLibrary to support some operations quickly when those are not included in the latest version yet.
+
+## cker
+
+"cker" stands for Cpu KERnel. It is a port of Tensorflow lite's operation kernels and possibly there are some own code. It is used by `cpu` backend.
diff --git a/infra/3rdparty/Eigen/fd6845384b86/URL.default b/infra/3rdparty/Eigen/fd6845384b86/URL.default
index 1fff1b4f3..76b000a52 100644
--- a/infra/3rdparty/Eigen/fd6845384b86/URL.default
+++ b/infra/3rdparty/Eigen/fd6845384b86/URL.default
@@ -1 +1 @@
-https://bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
+https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
diff --git a/infra/cmake/packages/BoostSourceConfig.cmake b/infra/cmake/packages/BoostSourceConfig.cmake
index ea2a6ca4b..52cda7c7d 100644
--- a/infra/cmake/packages/BoostSourceConfig.cmake
+++ b/infra/cmake/packages/BoostSourceConfig.cmake
@@ -9,7 +9,7 @@ function(_BoostSource_import)
# EXTERNAL_DOWNLOAD_SERVER will be overwritten by CI server to use mirror server.
envoption(EXTERNAL_DOWNLOAD_SERVER "http://sourceforge.net")
- set(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
+ envoption(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
ExternalSource_Download(BOOST ${BOOST_URL})
set(BoostSource_DIR ${BOOST_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/EigenSourceConfig.cmake b/infra/cmake/packages/EigenSourceConfig.cmake
index a0cbf82a9..4aaeb3d00 100644
--- a/infra/cmake/packages/EigenSourceConfig.cmake
+++ b/infra/cmake/packages/EigenSourceConfig.cmake
@@ -10,7 +10,7 @@ function(_EigenSource_import)
# NOTE TensorFlow 1.13.1 uses https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz
# but it has a issue https://eigen.tuxfamily.org/bz/show_bug.cgi?id=1643
# The following URL resolves above issue
- envoption(EXTERNAL_DOWNLOAD_SERVER "https://bitbucket.org")
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://mirror.bazel.build/bitbucket.org")
envoption(EIGEN_1_13_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/eigen/eigen/get/88fc23324517.tar.gz)
ExternalSource_Download(EIGEN
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
new file mode 100644
index 000000000..bc13d6227
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+ if(NOT DOWNLOAD_GEMMLOWP)
+ set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_GEMMLOWP)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.3.0.
+ # See tensorflow/tensorflow/workspace.bzl.
+ envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+ ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
+
+ set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
new file mode 100644
index 000000000..04df5eb6d
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..3dbf05ece
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowRuySource_import)
+ if(NOT DOWNLOAD_RUY)
+ set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_RUY)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.3.0.
+ # See tensorflow/third_party/ruy/workspace.bzl
+ envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+
+ ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
+
+ set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..04df5eb6d
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index 0be6885e2..d416db2fd 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -89,6 +89,7 @@ option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source"
option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index 3c6b7d960..b3d058164 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -30,6 +30,7 @@ option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake
index 97371f680..d720b202a 100644
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -8,11 +8,11 @@ option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" ON)
option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
# Need boost library
-option(DOWNLOAD_BOOST "Download boost source" OFF)
-option(BUILD_BOOST "Build boost source" OFF)
+option(DOWNLOAD_BOOST "Download boost source" ON)
+option(BUILD_BOOST "Build boost source" ON)
option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF)
option(BUILD_NNAPI_TEST "Build nnapi_test" OFF)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" OFF)
+option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
index 590d128da..57d4c1061 100644
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -6,6 +6,5 @@ option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source"
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index 25c848f95..c27a7ad01 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -6,6 +6,5 @@ option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source"
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/packages/BoostConfig.cmake b/infra/nnfw/cmake/packages/BoostConfig.cmake
index c4d7d5857..4f60e9107 100644
--- a/infra/nnfw/cmake/packages/BoostConfig.cmake
+++ b/infra/nnfw/cmake/packages/BoostConfig.cmake
@@ -13,7 +13,7 @@ function(_Boost_Build Boost_PREFIX)
RESULT_VARIABLE Boost_BUILD)
endif()
- set(BoostBuild_DIR ${BoostSource_DIR})
+ set(BoostBuild_DIR ${CMAKE_BINARY_DIR}/externals/boost)
set(BoostInstall_DIR ${Boost_PREFIX})
unset(Boost_Options)
@@ -55,18 +55,13 @@ if (NOT BUILD_BOOST)
endif()
endif()
-set(Boost_PREFIX ${CMAKE_INSTALL_PREFIX})
+set(Boost_PREFIX ${EXT_OVERLAY_DIR})
if(BUILD_BOOST)
_Boost_Build("${Boost_PREFIX}")
- # Let's use locally built boost to system-wide one so sub modules
- # needing Boost library and header files can search for them
- # in ${Boost_PREFIX} directory
- list(APPEND CMAKE_PREFIX_PATH "${Boost_PREFIX}")
-
# Without Boost_INCLUDE_DIR, it complains the variable is missing during find_package.
- set(Boost_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include)
+ set(Boost_INCLUDE_DIR ${Boost_PREFIX}/include)
# 1) without static build, it will complain it cannot find libc++_shared.so.
# 2) We uses static libraries for other libraries.
diff --git a/infra/nnfw/cmake/packages/HDF5Config.cmake b/infra/nnfw/cmake/packages/HDF5Config.cmake
index b965ffc2c..8c2badf54 100644
--- a/infra/nnfw/cmake/packages/HDF5Config.cmake
+++ b/infra/nnfw/cmake/packages/HDF5Config.cmake
@@ -4,6 +4,11 @@ unset(HDF5_INCLUDE_DIRS CACHE)
unset(HDF5_CXX_LIBRARY_hdf5 CACHE)
unset(HDF5_CXX_LIBRARY_hdf5_cpp CACHE)
+if(NOT BUILD_WITH_HDF5)
+ set(HDF5_FOUND FALSE)
+ return()
+endif(NOT BUILD_WITH_HDF5)
+
# Case 1. external hdf5
if(DEFINED EXT_HDF5_DIR)
find_path(HDF5_INCLUDE_DIRS NAMES H5Cpp.h NO_CMAKE_FIND_ROOT_PATH PATHS "${EXT_HDF5_DIR}/include")
diff --git a/infra/nnfw/command/copyright-check b/infra/nnfw/command/copyright-check
index b9ae5b46a..b5e133f82 100644
--- a/infra/nnfw/command/copyright-check
+++ b/infra/nnfw/command/copyright-check
@@ -4,11 +4,16 @@ INVALID_EXIT=0
check_copyright() {
DIRECTORIES_NOT_TO_BE_TESTED=$1
- CORRECT_COPYRIGHT="Copyright \(c\) [0-9]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
+ CORRECT_COPYRIGHT="Copyright \(c\) [0-9\-]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
FILES_TO_CHECK=$(git ls-files -c --exclude-standard)
FILES_TO_CHECK_COPYRIGHTS=()
for f in ${FILES_TO_CHECK[@]}; do
+ # Manually ignore checking
+ if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
+ continue
+ fi
+
# File extension to check
if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
FILES_TO_CHECK_COPYRIGHTS+=("${f}")
diff --git a/infra/scripts/build-tcm.sh b/infra/scripts/build-tcm.sh
index 22fb33558..38533c1f9 100755
--- a/infra/scripts/build-tcm.sh
+++ b/infra/scripts/build-tcm.sh
@@ -18,7 +18,7 @@ echo ${PROJECT_DIR:=${PWD}}
java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
--outdir=$PROJECT_DIR/tcm-output \
- --config=$PROJECT_DIR/.ahub/tcchecker-tca/config.yaml \
+ --config=$PROJECT_DIR/src/.ahub/tcchecker-tca/config.yaml \
--local=$PROJECT_DIR/src \
--logfile=$PROJECT_DIR/tcm-output/tcm.log \
--debug
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh
index 046bc8a4c..6cdfdf01b 100755
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -57,9 +57,11 @@ mkdir -p ${NNCC_INSTALL_PREFIX}
# create python virtual environment
./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install -U pip setuptools
+ install -U pip==20.2.1 setuptools==49.3.0
./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install tensorflow-cpu==2.3.0
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh
index 8f9e86fb0..05e55848c 100755
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -12,10 +12,12 @@ function Usage()
echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir"
echo "Usage: ./tizen_xu4_test.sh --test-suite-path=path/to/test-suite.tar.gz"
echo "Usage: ./tizen_xu4_test.sh --skip-install-model"
+ echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir --skip-test"
echo ""
echo "--rpm-dir <dir> : directory containing nnfw.rpm and nnfw-test.rpm"
echo "--test-suite-path <dir> : filepath to test-suite.tar.gz"
echo "--skip-install-model : skip install downloaded model"
+ echo "--skip-test : skip running test"
echo "--gcov-dir <dir> : directory to save gcov files"
}
@@ -36,7 +38,7 @@ function install_model()
# download api test model file for nnfw_api_gtest
MODEL_CACHE_DIR=$(mktemp -d)
tests/scripts/models/run_test.sh --download=on --run=off \
- --configdir=test/scripts/nnfw_api_gtest/models \
+ --configdir=tests/scripts/models/nnfw_api_gtest \
--cachedir=$MODEL_CACHE_DIR
tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
$SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
@@ -74,6 +76,7 @@ function prepare_suite_test()
}
INSTALL_MODEL="1"
+RUN_TEST="1"
# Parse command argv
for i in "$@"
do
@@ -102,6 +105,9 @@ do
--gcov-dir=*)
GCOV_DIR=${i#*=}
;;
+ --skip-test)
+ RUN_TEST="0"
+ ;;
esac
shift
done
@@ -148,6 +154,11 @@ else
echo "======= Skip install model ======="
fi
+if [ $RUN_TEST = "0" ]; then
+ echo "======= Skip test ======="
+ exit 0
+fi
+
if [ -z "${GCOV_DIR}" ]; then
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index a1157c702..1b8c5fb31 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
Name: nnfw
Summary: nnfw
-Version: 1.8.0
+Version: 1.9.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
@@ -203,10 +203,12 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%{_libdir}/pkgconfig/nnfw-plugin.pc
%endif
+%ifarch arm armv7l aarch64
%files minimal-app
%manifest %{name}.manifest
%defattr(-,root,root,-)
%{_bindir}/onert-minimal-app
+%endif
%if %{test_build} == 1
%files test
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe
new file mode 100644
index 000000000..fabe5ac59
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe
@@ -0,0 +1,52 @@
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV4"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ output: "selected_indices"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe
new file mode 100644
index 000000000..12b60677c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe
@@ -0,0 +1,52 @@
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "5" }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV4"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ output: "selected_indices"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe
new file mode 100644
index 000000000..5e2616a8b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "soft_nms_sigma"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "selected_scores"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV5"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ input: "soft_nms_sigma"
+ output: "selected_indices"
+ output: "selected_scores"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe
new file mode 100644
index 000000000..11d105908
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe
@@ -0,0 +1,66 @@
+operand {
+ name: "boxes"
+ type: FLOAT32
+ shape { dim: 10 dim: 4 }
+}
+operand {
+ name: "scores"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operand {
+ name: "max_output_size"
+ type: INT32
+ shape { }
+ filler { tag: "explicit" arg: "5" }
+}
+operand {
+ name: "iou_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "score_threshold"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "soft_nms_sigma"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "selected_indices"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "selected_scores"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "valid_outputs"
+ type: INT32
+ shape { }
+}
+operation {
+ type: "NonMaxSuppressionV5"
+ input: "boxes"
+ input: "scores"
+ input: "max_output_size"
+ input: "iou_threshold"
+ input: "score_threshold"
+ input: "soft_nms_sigma"
+ output: "selected_indices"
+ output: "selected_scores"
+ output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/PadV2_000/test.recipe b/res/TensorFlowLiteRecipes/PadV2_000/test.recipe
new file mode 100644
index 000000000..563339dee
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_000/test.recipe
@@ -0,0 +1,40 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "constant_values"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+ type: "PadV2"
+ input: "ifm"
+ input: "padding"
+ input: "constant_values"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PadV2_000/test.reverse b/res/TensorFlowLiteRecipes/PadV2_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe
new file mode 100644
index 000000000..0ecb9e9a8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operation {
+ type: "Tanh"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Unique_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
index 887380c48..3110b5ed9 100644
--- a/res/TensorFlowLiteRecipes/Unique_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
@@ -6,7 +6,7 @@ operand {
operand {
name: "ofm"
type: FLOAT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
index 9beb51690..d654f79b9 100644
--- a/res/TensorFlowLiteRecipes/Unique_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
@@ -6,7 +6,7 @@ operand {
operand {
name: "ofm"
type: FLOAT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_002/test.recipe b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
index 67b947ff8..d9f2393b8 100644
--- a/res/TensorFlowLiteRecipes/Unique_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
@@ -6,7 +6,7 @@ operand {
operand {
name: "ofm"
type: INT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_003/test.recipe b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
index 375db66e8..de9e87af9 100644
--- a/res/TensorFlowLiteRecipes/Unique_003/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
@@ -6,7 +6,7 @@ operand {
operand {
name: "ofm"
type: INT32
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
index d3985e401..3906d2c5e 100644
--- a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
@@ -7,7 +7,7 @@ operand {
operand {
name: "ofm"
type: UINT8
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
index b08dd85cc..2bac10ae7 100644
--- a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
@@ -7,7 +7,7 @@ operand {
operand {
name: "ofm"
type: UINT8
- shape { }
+ shape { dim: 0 }
}
operand {
name: "ofm_idx"
diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
new file mode 100644
index 000000000..99940bf85
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+import numpy as np
+
+input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
+paddings_ = tf.compat.v1.constant(
+ np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
+constant_values_ = tf.compat.v1.constant(1, shape=(), dtype=tf.float32)
+op_ = tf.compat.v1.pad(input_, paddings=paddings_, constant_values=constant_values_)
diff --git a/res/TensorFlowPythonExamples/examples/gelu/__init__.py b/res/TensorFlowPythonExamples/examples/gelu/__init__.py
new file mode 100644
index 000000000..cdc4d6295
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/gelu/__init__.py
@@ -0,0 +1,7 @@
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=False, name="Output")
diff --git a/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py b/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py
new file mode 100644
index 000000000..147688dd3
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py
@@ -0,0 +1,7 @@
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=True, name="Output")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
new file mode 100644
index 000000000..b8f010c67
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+ in_boxes_, in_scores_, max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
new file mode 100644
index 000000000..42e7bf06c
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
@@ -0,0 +1,13 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+pad_to_max_output_size = True
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+ in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+ pad_to_max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
new file mode 100644
index 000000000..32c6173b0
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
@@ -0,0 +1,10 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+ in_boxes_, in_scores_, max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
new file mode 100644
index 000000000..415f9209f
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
@@ -0,0 +1,14 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+soft_nms_sigma = tf.compat.v1.constant(0.5)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+ in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+ soft_nms_sigma)
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 5c17043eb..def89eeac 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.8.0"
+ versionName "1.9.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.h b/runtime/contrib/android/api/src/main/native/onert-native-api.h
index 366627bd0..13768d470 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.h
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_samsung_onert_NativeSessionWrapper */
diff --git a/runtime/libs/benchmark/include/benchmark/Phases.h b/runtime/libs/benchmark/include/benchmark/Phases.h
index abb0561c3..936a89742 100644
--- a/runtime/libs/benchmark/include/benchmark/Phases.h
+++ b/runtime/libs/benchmark/include/benchmark/Phases.h
@@ -47,7 +47,7 @@ public:
}
const PhaseOption &option() const { return _option; }
- const MemoryPoller &mem_poll() const { return _mem_poll; }
+ const MemoryPoller &mem_poll() const { return *_mem_poll; }
const Phase &at(const std::string &tag) const { return _phases.at(tag); }
private:
@@ -57,7 +57,7 @@ private:
private:
const PhaseOption _option;
std::unordered_map<std::string, Phase> _phases;
- MemoryPoller _mem_poll;
+ std::unique_ptr<MemoryPoller> _mem_poll;
};
} // namespace benchmark
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp
index ba0e4e4d2..61fdecd46 100644
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -165,31 +165,24 @@ bool MemoryPoller::end(PhaseEnum phase)
stop = (_phases.size() == 0);
}
- if (_rss_map[phase] == 0)
+ mem = getVmRSS();
+ if (_gpu_poll)
{
- uint32_t mem = getVmRSS();
- if (_gpu_poll)
- {
- mem += getGpuMemory();
- }
- _rss_map[phase] = mem;
+ mem += getGpuMemory();
}
+ if (mem > _rss_map[phase])
+ _rss_map[phase] = mem;
- if (_hwm_map[phase] == 0)
+ mem = getVmHWM();
+ if (_gpu_poll)
{
- uint32_t mem = getVmHWM();
- if (_gpu_poll)
- {
- mem += getGpuMemory();
- }
- _hwm_map[phase] = mem;
+ mem += getGpuMemory();
}
+ _hwm_map[phase] = mem;
- if (_pss_map[phase] == 0)
- {
- uint32_t mem = getPssSum();
+ mem = getPssSum();
+ if (mem > _pss_map[phase])
_pss_map[phase] = mem;
- }
if (stop)
{
diff --git a/runtime/libs/benchmark/src/Phases.cpp b/runtime/libs/benchmark/src/Phases.cpp
index 81da50828..9ab67cfd9 100644
--- a/runtime/libs/benchmark/src/Phases.cpp
+++ b/runtime/libs/benchmark/src/Phases.cpp
@@ -46,11 +46,13 @@ void SleepForMicros(uint64_t micros)
namespace benchmark
{
-Phases::Phases(const PhaseOption &option)
- : _option(option),
- _mem_poll(std::chrono::milliseconds(option.memory_interval), option.memory_gpu)
+Phases::Phases(const PhaseOption &option) : _option(option)
{
- // DO NOTHING
+ if (_option.memory)
+ {
+ _mem_poll = std::make_unique<MemoryPoller>(std::chrono::milliseconds(option.memory_interval),
+ option.memory_gpu);
+ }
}
void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc *post,
@@ -61,7 +63,7 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
for (uint32_t i = 0; i < loop_num; ++i)
{
if (!option_disable && _option.memory)
- _mem_poll.start(p);
+ _mem_poll->start(p);
uint64_t t = 0u;
t = nowMicros();
@@ -71,15 +73,15 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
t = nowMicros() - t;
if (!option_disable && _option.memory)
- _mem_poll.end(p);
+ _mem_poll->end(p);
phase.time.emplace_back(t);
if (!option_disable && _option.memory)
{
- phase.memory[MemoryType::RSS].emplace_back(_mem_poll.getRssMap().at(p));
- phase.memory[MemoryType::HWM].emplace_back(_mem_poll.getHwmMap().at(p));
- phase.memory[MemoryType::PSS].emplace_back(_mem_poll.getPssMap().at(p));
+ phase.memory[MemoryType::RSS].emplace_back(_mem_poll->getRssMap().at(p));
+ phase.memory[MemoryType::HWM].emplace_back(_mem_poll->getHwmMap().at(p));
+ phase.memory[MemoryType::PSS].emplace_back(_mem_poll->getPssMap().at(p));
}
if (post)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index ef3678b0d..9348df6ae 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -103,6 +103,8 @@ typedef enum {
NNFW_STATUS_INVALID_STATE = 3,
/** When it is out of memory */
NNFW_STATUS_OUT_OF_MEMORY = 4,
+ /** When it was given an insufficient output buffer */
+ NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
} NNFW_STATUS;
/**
diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h
index 4cd5c585a..94f781988 100644
--- a/runtime/onert/api/include/nnfw_experimental.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -62,4 +62,38 @@ typedef struct
NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
custom_kernel_registration_info *info);
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
#endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 320271a26..42e43760b 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000800
+#define NNFW_VERSION 0x01000900
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index d65158fd8..ff5e679da 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -33,6 +33,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
@@ -347,3 +348,15 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
NNFW_RETURN_ERROR_IF_NULL(session);
return session->load_circle_from_buffer(buffer, size);
}
+
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->input_tensorindex(tensorname, index);
+}
+
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->output_tensorindex(tensorname, index);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index eb0b743d3..81b40703f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -18,6 +18,7 @@
#include "CustomKernelRegistry.h"
#include "compiler/Compiler.h"
#include "util/ConfigSource.h"
+#include "util/Exceptions.h"
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
@@ -37,6 +38,7 @@
#define MAX_BACKEND_NAME_LENGTH 32
#define MAX_OP_NAME_LENGTH 64
#define MAX_PATH_LENGTH 1024
+#define MAX_TENSOR_NAME_LENGTH 64
// Is null-terminating in length ?
static bool null_terminating(const char *str, uint32_t length)
@@ -64,6 +66,32 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
return onert::ir::Layout::UNKNOWN;
}
+NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+ uint32_t *index, bool is_input)
+{
+ if (!tensorname || !index)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH))
+ {
+ std::cerr << "nnpackage path is too long" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname);
+
+ if (ind_found.undefined())
+ {
+ // Not found
+ return NNFW_STATUS_ERROR;
+ }
+ else
+ {
+ *index = ind_found.value();
+ return NNFW_STATUS_NO_ERROR;
+ }
+}
+
nnfw_session::nnfw_session()
: _subgraphs{nullptr}, _execution{nullptr},
_kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
@@ -213,6 +241,12 @@ NNFW_STATUS nnfw_session::run()
{
_execution->execute();
}
+ catch (const onert::InsufficientBufferSizeException &e)
+ {
+ // Currently insufficient buffer always means output buffer.
+ std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+ return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+ }
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
@@ -447,26 +481,27 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
}
}
+ auto ind = primary_subgraph()->getInputs().at(index);
+ auto &input = primary_subgraph()->operands().at(ind);
+
+ onert::ir::Shape new_shape(ti.rank);
+ for (int32_t i = 0; i < ti.rank; i++)
+ new_shape.dim(i) = ti.dims[i];
+
+ // if passed shape is same with the shape of model, do nothing
+ if (input.info().shape() == new_shape)
+ return NNFW_STATUS_NO_ERROR;
+
if (!isStatePreparedOrFinishedRun())
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto ind = primary_subgraph()->getInputs().at(index);
- auto &input = primary_subgraph()->operands().at(ind);
-
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
// overwrite input shape with the shape from ti
input.info().shape(new_shape);
}
else // when called after nnfw_session::prepare()
{
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
-
_execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
}
@@ -840,3 +875,13 @@ bool nnfw_session::isStatePreparedOrFinishedRun()
{
return isStatePrepared() || isStateFinishedRun();
}
+
+NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true);
+}
+
+NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 1c3c3706f..604ba38b4 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -122,8 +122,6 @@ public:
NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
- NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
@@ -133,9 +131,16 @@ public:
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
-
NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+ //
+ // Experimental API
+ //
+
+ NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+ NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
+ NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+
private:
onert::ir::Graph *primary_subgraph();
bool isStateInitialized();
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 8aaf516cd..5c5041378 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
#include "KernelGenerator.h"
#include "TensorManager.h"
#include "Optimizer.h"
+#include "AclTensorRegistry.h"
namespace onert
{
@@ -47,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index d7f5f8031..31f1c10eb 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
{
copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
}
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::Gather &node)
{
copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
}
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
} // namespace acl_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index c51f72b11..4f894fd31 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,32 +26,18 @@ namespace backend
namespace acl_cl
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::EmbeddingLookup &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::HashtableLookup &) final;
+ void visit(const ir::operation::SpaceToBatchND &) final;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index a84f983b4..94489253d 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
namespace acl_cl
{
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ const auto activation = node.param().activation;
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::CLCopy>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::CLCast>();
-
- // TODO Support converting float to int32 as round down
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
- auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Reduce &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
- acl_common::convertReduceType(reduce_type));
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::CLSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
rank, pv, frontend_layout, backend_layout);
- auto fn = std::make_unique<::arm_compute::CLPermute>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+ arm_compute::BinaryLogicalOperation::AND);
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+ output_tensor->handle());
+ ;
+ }
+ else
+ {
+ // TODO Support converting float to int32 as round down
+ fn = acl_common::generateLayer<arm_compute::CLCast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto acl_fn = asAclClFunction(std::move(fn));
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+ break;
+ }
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- ::arm_compute::BinaryLogicalOperation::AND);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
- ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ICLTensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
// Revert disabling applied dim_correction
assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
}
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLScale>();
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
- _return_fn = asAclClFunction(std::move(copy_layer));
+ auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ _return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLFloor>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
- fn = std::move(l);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
-
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
- fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+ invalid_vertical);
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+ auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_tensor = _tensor_builder->at(outputValues_index).get();
- auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
- auto input_tensor = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
- auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+ input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
- fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLNeg>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
- fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLSplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ICLTensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
- auto fn = std::make_unique<::arm_compute::CLPadLayer>();
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
// Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
// It would produce a mistach of result
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index 1e3b06489..d188d6d83 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::ResizeNearestNeighbor &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::TopKV2 &) override;
void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::ConvertFp32ToFp16 &) override;
void visit(const ir::operation::ConvertFp16ToFp32 &) override;
@@ -104,6 +88,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 6ba3143e8..9134d3fb8 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index bdbd0364e..ab295dbec 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager =
acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
new file mode 100644
index 000000000..6ad5b7b69
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+ // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
new file mode 100644
index 000000000..52f4c54cf
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+ AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
index 85b18e847..94b65863a 100644
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
std::unique_ptr<::arm_compute::IFunction> _func;
};
-class AclClFunction : public AclFunction
-{
-public:
- using AclFunction::AclFunction;
-};
-
} // namespace acl_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 9f7ce3764..372ce689e 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
namespace acl_common
{
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+ auto l = std::make_unique<Layer>();
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+ auto l = std::make_unique<Layer>(memory_manager);
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+ typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+ const ir::Operands &operands,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg)
{
// TODO Support dynamic rnn
// TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
const auto projection_clip = projection_threshold;
assert(cell_clip >= 0.f && projection_clip >= 0.f);
- auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
- auto output_tensor = tensor_builder->at(output_index).get();
+ auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = tensor_builder->at(input_index).get();
+ auto input_tensor = tensor_reg->getAclTensor(input_index).get();
- auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto input_to_forget_weights_tensor =
+ tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor =
+ tensor_reg->getAclTensor(input_to_output_weights_index).get();
auto recurrent_to_forget_weights_tensor =
- tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor =
+ tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
auto recurrent_to_output_weights_tensor =
- tensor_builder->at(recurrent_to_output_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
- auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+ auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<T_ACLLayer>();
+ auto act_info = asActivationLayerInfo(activation);
::arm_compute::LSTMParams<T_Tensor> lstm_params{};
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_builder->at(input_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
auto recurrent_to_input_weights_tensor =
- tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
: nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
auto cell_to_output_weights_tensor =
- tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
if (has_projection_param)
{
- auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
+ auto projection_weights_tensor =
+ tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+ auto projection_bias_handle =
+ has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
- fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
- input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
- recurrent_to_forget_weights_tensor->handle(),
- recurrent_to_cell_weights_tensor->handle(),
- recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
- cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
- output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
- scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
- cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
- cell_clip, projection_clip);
+ auto fn = generateLayer<T_ACLLayer>(
+ input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+ projection_clip);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
+ typename T_TensorBuilder, typename T_TensorRegistry>
std::unique_ptr<exec::IFunction>
kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
{
using ir::operation::FullyConnected;
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
reshape.dim(1) = input_size; /* W */
}
- auto output_tensor = tensor_builder->at(output_index).get();
- const auto input_tensor = tensor_builder->at(input_index).get();
- const auto weight_tensor = tensor_builder->at(weight_index).get();
- const auto bias_tensor = tensor_builder->at(bias_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+ const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+ const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+ const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
const auto frontend_layout = layout;
const auto acl_layout = output_tensor->handle()->info()->data_layout();
- auto fn =
- std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
if (operands.at(weight_index).isConstant())
{
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
assert(operands.at(weight_index).data());
}
- fn->configure(
- input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
- output_tensor->handle(), needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
+ auto fn = generateLayer<T_ACLLayer>(
+ tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+ asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
std::unique_ptr<::arm_compute::IFunction>
kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
::arm_compute::PoolingType pooling_type)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
- auto ofm_tensor = tensor_builder->at(ofm_index).get();
- auto ifm_tensor = tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
::arm_compute::PoolingLayerInfo info{
pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<T_ACLLayer>();
+ asPadStrideInfo(padding, stride), true /* exclude_padding */};
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+ auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
return fn;
}
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 6b03fdf7f..91452014b 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
#include <memory>
#include "ParentInfo.h"
#include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
void notifyLastUse(const ir::OperandIndex &) override;
bool isRegistered(const ir::OperandIndex &) const override;
- std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
void prepare(void) override;
void allocate() override;
void postFunctionPrepare() override;
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
*/
bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
- bool supportDynamicTensor() override { return false; }
-
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+ std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
- T_AclTensorManager *tensor_mgr)
- : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+ const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
{
assert(_tensor_mgr);
}
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
- _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- auto ret = _tensor_mgr->at(ind);
- assert(ret != nullptr);
- return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
std::unique_ptr<ITensorManager>
AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
{
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
new file mode 100644
index 000000000..1ef9f4b35
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+ AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ return _tensor_mgr->at(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ return getITensor(ind);
+ }
+
+ auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+ T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index a5bbe1691..67dcc8192 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
#include "Swizzle.h"
#include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
#include <memory>
namespace
@@ -177,6 +178,50 @@ namespace acl_common
}
}
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta)
+{
+ switch (op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ if (beta == 0.f)
+ {
+ if (alpha == ir::operation::ElementwiseActivation::infinity)
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+ }
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+ // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+ // 0(always sigmoid) regardless of values of the parameter.
+ // If ACL support non-sigmoid logistic, should fix param values.
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ break;
+ }
+}
+
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout)
{
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
return std::make_unique<AclFunction>(std::move(layer));
}
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
- return std::make_unique<AclClFunction>(std::move(layer));
-}
-
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
{
switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
}
}
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+ switch (pool_type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return arm_compute::PoolingType::AVG;
+ case ir::operation::Pool2D::PoolType::L2:
+ return arm_compute::PoolingType::L2;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return arm_compute::PoolingType::MAX;
+ default:
+ throw std::runtime_error("convertPoolType: Not supported operation yet");
+ }
+}
+
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 9362098a5..380321c07 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
#include "ir/Layout.h"
#include "ir/InternalType.h"
#include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
#include "ir/Shape.h"
#include "ir/TypeInfo.h"
#include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
const ir::Stride &stride);
::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta);
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
ir::Layout backend_layout);
std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
template <typename T_Function>
std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
} // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index a0b145e19..35d6e4e8e 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 4191b277f..79edb9ded 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
} // namespace acl_neon
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index 6b4c1f145..c7d71cdcf 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,29 +26,15 @@ namespace backend
namespace acl_neon
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::SpaceToBatchND &node) final;
};
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 1195b83cc..6d53c1245 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
}
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
- fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::NECopy>();
+ const auto activation = node.param().activation;
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::NECast>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+ fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+ {
+ // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+ // 'vexpq_f32()'.
+ // The neon function returns a value outside of the limit of representation in float as 'NaN'
+ // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+ ofm_tensor->handle(), act_info);
+ }
- auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- _return_fn = std::move(acl_fn);
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+ output_tensor->handle());
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NECast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto fn = std::make_unique<::arm_compute::NEFloor>();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+ "is not supported yet");
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+ auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
// It would create an error when the kernel accesses high dimension that its value is 1
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
- // The neon function returns a value outside of the limit of representation in float as 'NaN'
- // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
- auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
_return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
- ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ITensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
// Disable applied dim_correction
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto rank = _ctx.at(input_index).shape().rank();
auto pad_base = _ctx.at(pad_index).data()->base();
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto pixel_value =
::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- auto fn = std::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
void KernelGenerator::visit(const ir::operation::Permute &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
- l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
{
- auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
- acl_common::convertReduceType(reduce_type));
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+ input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+ acl_common::convertReduceType(reduce_type));
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEScale>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+ auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+ auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = input_tensor->layout();
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
}
- auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+ auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
rank, pv, frontend_layout, backend_layout);
std::unique_ptr<::arm_compute::IFunction> fn;
-
if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
- auto l = std::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+ ofm_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ITensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->at(out_idx).get();
- auto indices_tensor = _tensor_builder->at(indices_idx).get();
- auto depth_tensor = _tensor_builder->at(depth_idx).get();
- auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
- auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
- auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
- fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
- offvalue_tensor->handle(), output_tensor->handle(), axis);
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+ auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+ auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+ indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+ offvalue_tensor->handle(), output_tensor->handle(), axis);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index d6f7932b7..4d269cde5 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 2948cab09..ac80901cc 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3ec9efa8f..3b7cfbcfd 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
operand::NESubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 56bd352e0..fc8574b26 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index f314a8e39..e90b21054 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
{
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
- kernel_gen, tensor_register, optimizer),
+ : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+ constant_initializer, kernel_gen, tensor_register,
+ optimizer),
_external_context(new ExternalContext)
{
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index deb27f0fe..6f6eb77bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index de03a693a..c016c83bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
void visit(const ir::operation::FullyConnected &) override;
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 7939fe894..74b6f0c6b 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
#include "KernelGenerator.h"
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
#include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
#include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
#include "ops/ExpandDimsLayer.h"
#include "ops/FillLayer.h"
#include "ops/FullyConnectedLayer.h"
#include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
#include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
#include "ops/OneHotLayer.h"
#include "ops/OperationUtils.h"
#include "ops/PackLayer.h"
#include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
#include "ops/PowLayer.h"
#include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
#include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
#include "ops/SelectLayer.h"
#include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
#include "ops/SliceLayer.h"
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
#include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
#include "ops/TransposeLayer.h"
#include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
namespace
{
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return ops::ArithmeticType::kAdd;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return ops::ArithmeticType::kSub;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return ops::ArithmeticType::kMul;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return ops::ArithmeticType::kDiv;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return ops::ElementwiseActivationType::kLogistic;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ return ops::ElementwiseActivationType::kReLU;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ops::ElementwiseActivationType::kTanh;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ return ops::ElementwiseBinaryType::kLogicalOr;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ return ops::ElementwiseBinaryType::kMax;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ return ops::ElementwiseBinaryType::kMin;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ return ops::ElementwiseUnaryType::kAbs;
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ return ops::ElementwiseUnaryType::kCast;
+ case ir::operation::ElementwiseUnary::Type::COS:
+ return ops::ElementwiseUnaryType::kCos;
+ case ir::operation::ElementwiseUnary::Type::ERF:
+ return ops::ElementwiseUnaryType::kErf;
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ return ops::ElementwiseUnaryType::kExp;
+ case ir::operation::ElementwiseUnary::Type::LOG:
+ return ops::ElementwiseUnaryType::kLog;
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ return ops::ElementwiseUnaryType::kLogicalNot;
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ return ops::ElementwiseUnaryType::kNeg;
+ case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+ return ops::ElementwiseUnaryType::kQuantize;
+ case ir::operation::ElementwiseUnary::Type::ROUND:
+ return ops::ElementwiseUnaryType::kRound;
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ return ops::ElementwiseUnaryType::kRSqrt;
+ case ir::operation::ElementwiseUnary::Type::SIN:
+ return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+ return ops::ElementwiseUnaryType::kZerosLike;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return ops::PoolType::kAvg;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return ops::PoolType::kMax;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
- _external_context(external_context)
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
- _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+ dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
{
- auto portable_tensor = _tensor_builder->portableAt(ind);
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
if (portable_tensor)
{
assert(portable_tensor->layout() == ir::Layout::NHWC);
}
- auto tensor = _tensor_builder->at(ind);
+ auto tensor = _tensor_reg->getNativeTensor(ind);
if (tensor)
{
tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
auto fn = std::make_unique<ops::ConvolutionLayer>();
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_tensor);
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
- activation, ofm_tensor);
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::MaxPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AvgPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::ConcatLayer>();
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
if (node.getInputs().size() != NNApiInputs)
{
const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
- crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
}
fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto value_tensor = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
auto bias_tensor =
- bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+ bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// optional 2nd input
IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::AddLayer>();
+ auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+ convertArithmeticType(node.param().arithmetic_type));
_return_fn = std::move(fn);
}
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto comparison_type = node.param().comparison_type;
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::SubLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MulLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::OneHot &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+ auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+ auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+ auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
assert(indices_tensor->data_type() == OperandType::INT32);
assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::DivLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto equation = node.param().equation;
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_tensor = _tensor_builder->portableAt(idx);
+ auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
}
};
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::ExpLayer>();
+ auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+ convertElementwiseActivationType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::ExpandDimsLayer>();
+ auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+ convertElementwiseBinaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::LogisticLayer>();
+ auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
- auto fn = std::make_unique<ops::TanhLayer>();
+ auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
assert(-rank <= axis && axis < rank);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::PackLayer>();
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
- output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::UnpackLayer>();
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto output_index{node.getOutputs().at(0)};
assert(_ctx.at(pad_index).data());
- auto input = _tensor_builder->portableAt(input_index).get();
- auto output = _tensor_builder->portableAt(output_index).get();
+ auto input = _tensor_reg->getPortableTensor(input_index).get();
+ auto output = _tensor_reg->getPortableTensor(output_index).get();
auto pad_rank = _ctx.at(pad_index).shape().dim(0);
auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MaxLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MinLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CastLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
}
}
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLULayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLU6Layer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
- auto true_tensor = _tensor_builder->portableAt(true_index).get();
- auto false_tensor = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
- auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+ auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
- auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
- auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+ auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+ auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
auto axis_resolved = axis < 0 ? axis + rank : axis;
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitLayer>();
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AbsLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::SinLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CosLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::RsqrtLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Shape &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ResizeBilinearLayer>();
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::NegLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::PowLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::LogLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RoundLayer>();
+ auto fn = std::make_unique<ops::PoolLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+ convertPoolType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::LogicalNotLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::LogicalOrLayer>();
+ auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::L2NormLayer>();
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+ auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+ auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto start_tensor = _tensor_builder->portableAt(start_index).get();
- auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
- auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RangeLayer>();
+ auto fn = std::make_unique<ops::RankLayer>();
+
+ fn->configure(ifm_tensor, ofm_tensor);
- fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+ auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+ auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
-
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
- auto fn = std::make_unique<ops::QuantizeLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
const auto output_index{node.getOutputs().at(0)};
auto block_size = node.param().block_size;
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
auto fn = std::make_unique<ops::SpaceToDepthLayer>();
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
- auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+ auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
- auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
- auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+ auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+ auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitVLayer>();
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 40c056a96..786e68ee0 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
#include "ExternalContext.h"
#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
@@ -46,8 +48,6 @@ public:
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Fill &) override;
void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::Custom &node) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Max &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Cast &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::Cos &) override;
- void visit(const ir::operation::Sin &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Log &) override;
- void visit(const ir::operation::Round &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::ZerosLike &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::LogicalOr &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
+ void visit(const ir::operation::Rank &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::Quantize &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::StatelessRandomUniform &) override;
void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index ab8ba5756..828d52f7c 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
namespace cpu
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
-{
- return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getNativeTensor(ind);
-}
-
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
{
return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 617136514..b6d5f09cc 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
- /**
- * @brief Get tensor with a specific OperandIndex.
- * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
- * If not, program will crash with assert or exception.
- * @return shared_ptr<Tensor>
- */
- std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc
deleted file mode 100644
index 322785aeb..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
- nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void AbsLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- absFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- absQuant8();
- }
- else
- {
- throw std::runtime_error{"Abs: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h
deleted file mode 100644
index feb5f35ae..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
- AbsLayer();
-
-public:
- void absFloat32();
-
- void absQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc
deleted file mode 100644
index 379215303..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- // cker quant8 add is not implemented yet
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void AddLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- addQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- addInt32();
- }
- else
- {
- throw std::runtime_error{"Add: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h
deleted file mode 100644
index 91030d93a..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
- AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void addInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
deleted file mode 100644
index 9c22c1c86..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
- AVGPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
- AVGPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- assert(input != nullptr);
- assert(output != nullptr);
-
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void AvgPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- averagePoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- averagePoolQuant8();
- }
- else
- {
- throw std::runtime_error{"AvgPool: unsupported data type"};
- }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
deleted file mode 100644
index d4e8f79e7..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
- AvgPoolLayer();
-
-public:
- void averagePoolFloat32();
-
- void averagePoolQuant8();
-
- void configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
new file mode 100644
index 000000000..f50c63375
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ const bool need_broadcast =
+ nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+ if (need_broadcast)
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ switch (lhs->data_type())
+ {
+ case OperandType::FLOAT32:
+ {
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+ return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ case OperandType::INT32:
+ {
+ int32_t output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ default:
+ throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+ }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ // Parameters for scaled quantized computation
+ op_params.left_shift = 20;
+ // Zero-points of input and output tensors
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+ assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+ assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+ assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+ // Compute normalized scale for _lhs and _rhs values,
+ // and represent in 32-bit fixed point
+ const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+ const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+ const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+ // output scale is used to normalize final result, so we invert the scale here
+ const double real_output_scale =
+ norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+ // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+ QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+ QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+ QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+
+ double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+ QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ir::Activation activation,
+ const ArithmeticType arithmetic_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ switch (arithmetic_type)
+ {
+ case ArithmeticType::kAdd:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kSub:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ op_params.input2_multiplier *= -1;
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kMul:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kDiv:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ throw std::runtime_error{
+ "BinaryArithmetic(Div): Div operation does not support quantization"};
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+ op_params);
+ }
+ break;
+ default:
+ throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+ }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
index 9411be76e..d6b33ad07 100644
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
namespace ops
{
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+ kAdd,
+ kSub,
+ kMul,
+ kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
{
public:
- DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- void divFloat32();
-
- void divQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ir::Activation activation, const ArithmeticType arithmetic_type);
void run() override;
@@ -54,7 +58,7 @@ private:
const IPortableTensor *_rhs;
IPortableTensor *_output;
- ir::Activation _activation{ir::Activation::NONE};
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -62,4 +66,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc
deleted file mode 100644
index 497515606..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
- auto input_shape = getTensorShape(_input);
- auto output_shape = getTensorShape(_output);
- const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
- std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
- switch (_output->data_type())
- {
- case ir::DataType::FLOAT32:
- castTensor(in, out.f);
- return;
- case ir::DataType::INT32:
- castTensor(in, out.i32);
- return;
- case ir::DataType::UINT32:
- castTensor(in, out.u32);
- return;
- case ir::DataType::UINT8:
- castTensor(in, out.u8);
- return;
- case ir::DataType::BOOL8:
- castTensor(in, out.b);
- return;
- case ir::DataType::INT64:
- castTensor(in, out.i64);
- return;
- default:
- throw std::runtime_error("Not supported output type" +
- std::to_string((int)_output->data_type()));
- }
-}
-
-void CastLayer::run()
-{
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
- const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
- auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
- switch (_input->data_type())
- {
- case ir::DataType::FLOAT32:
- castPtr(in.f, out);
- return;
- case ir::DataType::INT32:
- castPtr(in.i32, out);
- return;
- case ir::DataType::UINT32:
- castPtr(in.u32, out);
- return;
- case ir::DataType::UINT8:
- castPtr(in.u8, out);
- return;
- case ir::DataType::BOOL8:
- castPtr(in.b, out);
- return;
- case ir::DataType::INT64:
- castPtr(in.i64, out);
- return;
- default:
- throw std::runtime_error("Cast: unsupported data type" +
- std::to_string((int)_input->data_type()));
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h
deleted file mode 100644
index 290c722e2..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
- CastLayer();
-
-public:
- template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
- template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2d5bbef1e..c057267d3 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
ConvolutionLayer::ConvolutionLayer()
: _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
_paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
_conv_kernel(new nnfw::cker::Conv()), _prepare(false)
{
// DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
op_params.padding_values.height = _paddingTop;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
nnfw::cker::ConvParams op_params;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.padding_type = getPaddingType(_paddingType);
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
const uint32_t paddingLeft, const uint32_t paddingRight,
const uint32_t paddingTop, const uint32_t paddingBottom,
const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
const ir::Activation activation, IPortableTensor *output)
{
_input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
_paddingBottom = paddingBottom;
_strideWidth = strideWidth;
_strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
_activation = activation;
_output = output;
}
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
{
bool is_transposed = false;
kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getPaddingType(_paddingType), is_transposed);
+ getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+ _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 2833387c4..398892e65 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const ir::Activation activation,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
IPortableTensor *output);
void run() override;
@@ -77,6 +78,8 @@ private:
uint32_t _strideWidth;
uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
ir::Activation _activation;
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc
deleted file mode 100644
index 9417019d5..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
- nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void CosLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- cosFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- cosQuant8();
- }
- else
- {
- throw std::runtime_error{"Cos: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h
deleted file mode 100644
index 1fadef718..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
- CosLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void cosFloat32();
- void cosQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc
deleted file mode 100644
index 556c55e33..000000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
- if (requires_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
-}
-
-void DivLayer::divQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 div is not implemented yet
- throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void DivLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- divFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- divQuant8();
- }
- else
- {
- throw std::runtime_error{"Div: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..c1d63172b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+ : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+ const auto input_scale = static_cast<double>(_input->data_scale());
+ const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+ const auto output_scale = static_cast<double>(_output->data_scale());
+ const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ float transformed = 0.f;
+ if (op_type == ElementwiseActivationType::kTanh)
+ {
+ transformed = std::tanh(dequantized);
+ }
+ else if (op_type == ElementwiseActivationType::kLogistic)
+ {
+ transformed = 1.0f / (1.0f + std::exp(-dequantized));
+ }
+ else
+ {
+ throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+ }
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+ }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+ const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = _table[input_data[i]];
+ }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ float alpha, float beta,
+ ElementwiseActivationType op_type)
+{
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseActivationType::kLogistic:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Logistic(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else if (alpha == 6.f && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU6(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error(
+ "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kTanh:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+ }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 35a184074..3ef580041 100644
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,26 +30,33 @@ namespace cpu
namespace ops
{
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
{
-public:
- TanhLayer();
+ kLogistic,
+ kReLU,
+ kTanh
+};
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
public:
- void tanhFloat32();
+ ElementwiseActivationLayer();
- void tanhQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+ const ElementwiseActivationType op_type);
void run() override;
- void PopulateLookupTable();
+ void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+ void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
private:
const IPortableTensor *_input;
IPortableTensor *_output;
uint8_t _table[256];
+ std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
};
} // namespace ops
@@ -57,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
new file mode 100644
index 000000000..ea3c1e7cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalOrBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalOrElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ const IPortableTensor *output)
+{
+ return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+ (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseBinaryType::kLogicalOr:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalOrGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMax:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Max NYI for quantized");
+ }
+ _kernel = maximumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = maximumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Max: unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMin:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Min NYI for quantized");
+ }
+ _kernel = minimumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ _kernel = minimumGeneric<int32_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = minimumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Min: unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index ed8dc5b0f..052747a4c 100644
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,20 +30,25 @@ namespace cpu
namespace ops
{
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+ kLogicalAnd,
+ kLogicalOr,
+ kMax,
+ kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
{
public:
- MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- template <typename T> void maximum();
-
- void maxQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ElementwiseBinaryType op_type);
void run() override;
@@ -51,6 +56,7 @@ private:
const IPortableTensor *_lhs;
const IPortableTensor *_rhs;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -58,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
new file mode 100644
index 000000000..f8f89ab15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+ switch (data_type_out)
+ {
+ case ir::DataType::FLOAT32:
+ std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+ return;
+ case ir::DataType::INT32:
+ std::transform(in, in + num_elements, out.i32,
+ [](FromT a) { return static_cast<int32_t>(a); });
+ return;
+ case ir::DataType::UINT32:
+ std::transform(in, in + num_elements, out.u32,
+ [](FromT a) { return static_cast<uint32_t>(a); });
+ return;
+ case ir::DataType::UINT8:
+ std::transform(in, in + num_elements, out.u8,
+ [](FromT a) { return static_cast<uint8_t>(a); });
+ return;
+ case ir::DataType::BOOL8:
+ std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+ return;
+ case ir::DataType::INT64:
+ std::transform(in, in + num_elements, out.i64,
+ [](FromT a) { return static_cast<int64_t>(a); });
+ return;
+ default:
+ throw std::runtime_error("Cast: Not supported output type" +
+ std::to_string((int)data_type_out));
+ }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+ auto input_buf = input->buffer();
+ auto output_buf = output->buffer();
+ const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+ auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+ auto input_shape = getTensorShape(input);
+ auto output_shape = getTensorShape(output);
+ const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+ switch (input->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castPtr(in.f, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT32:
+ castPtr(in.i32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT32:
+ castPtr(in.u32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT8:
+ castPtr(in.u8, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::BOOL8:
+ castPtr(in.b, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT64:
+ castPtr(in.i64, out, num_elements, output->data_type());
+ return;
+ default:
+ throw std::runtime_error("Cast: unsupported data type" +
+ std::to_string((int)input->data_type()));
+ }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+ output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ if (!HaveSameShapes(input, output))
+ throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+ auto element_size = getTensorShape(input).FlatSize();
+
+ memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseUnaryType::kAbs:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = absFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Abs: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kCast:
+ _kernel = cast;
+ break;
+ case ElementwiseUnaryType::kCos:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = cosFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Cos: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kExp:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = expFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kErf:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = erfFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLog:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = logFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Log: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLogicalNot:
+ if ((input->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalNot;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalNot: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kNeg:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = negFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Neg: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kQuantize:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = affineQuantize<float, uint8_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRound:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = roundFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Round: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = rsqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"RSqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSin:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sinFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sin: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kZerosLike:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = zerosLikeFloat32<float>;
+ }
+ else if (input->data_type() == OperandType::INT32)
+ {
+ _kernel = zerosLikeFloat32<int32_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"ZerosLike: Unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 994d17a30..74968386d 100644
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,23 +30,41 @@ namespace cpu
namespace ops
{
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
{
-public:
- ReLU6Layer();
+ kAbs,
+ kCast,
+ kCos,
+ kErf,
+ kExp,
+ kLog,
+ kLogicalNot,
+ kNeg,
+ kQuantize,
+ kRound,
+ kRSqrt,
+ kSin,
+ kZerosLike
+};
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
public:
- void relu6Float32();
+ ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+ {
+ // DO NOTHING
+ }
- void relu6Quant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type);
void run() override;
private:
const IPortableTensor *_input;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -54,4 +72,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc
deleted file mode 100644
index 4dbec9cd5..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
- nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
- // cker quant8 exp is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ExpLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- expFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- expQuant8();
- }
- else
- {
- throw std::runtime_error{"Exp: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc
deleted file mode 100644
index 307c15bc4..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
- nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logQuant8();
- }
- else
- {
- throw std::runtime_error{"Log: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h
deleted file mode 100644
index 2f6b4b570..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
- LogLayer();
-
-public:
- void logFloat32();
-
- void logQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 06dde4fc4..1d7ee6caa 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
// DO NOTHING
}
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+ const float scale = -_input->data_scale() * kBeta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ _table[max_uint8 - val] = expf(scale * val);
+ }
+}
+
void LogSoftMaxLayer::logsoftmaxFloat32()
{
nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
void LogSoftMaxLayer::logsoftmaxQuant8()
{
- // NYI
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ op_params.axis = _axis;
+ op_params.table = _table;
+ op_params.zero_point = _output->data_offset();
+ op_params.scale = _output->data_scale();
+ nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
}
void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
_output = output;
_beta = beta;
_axis = axis;
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(_beta);
+ }
}
void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- throw std::runtime_error{"LogSoftmax : NYI"};
+ logsoftmaxQuant8();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index ba9deca17..1533f3361 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
void run();
+ void PopulateLookupTable(const float kBeta);
+
private:
const IPortableTensor *_input;
IPortableTensor *_output;
float _beta;
int _axis;
+ float _table[256];
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
deleted file mode 100644
index f2192c148..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
- nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogicalNotLayer::run()
-{
- if (_input->data_type() == OperandType::BOOL8)
- {
- logicalNotBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalNot: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
deleted file mode 100644
index 5543cca3d..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalNotLayer();
-
-public:
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void logicalNotBool8();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
deleted file mode 100644
index 5b7c9f6f0..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
- if (!HaveSameShapes(_lhs, _rhs))
- {
- nnfw::cker::LogicalOrBroadcast<bool>(
- getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
- reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
- reinterpret_cast<bool *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
- reinterpret_cast<const bool *>(_lhs->buffer()),
- reinterpret_cast<const bool *>(_rhs->buffer()),
- reinterpret_cast<bool *>(_output->buffer()));
- }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void LogicalOrLayer::run()
-{
- if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
- {
- lorBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalOr: Unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
deleted file mode 100644
index efaf396e8..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // Nothing
- }
-
-public:
- void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- void lorBool8();
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc
deleted file mode 100644
index 140ab4d2c..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void LogisticLayer::logisticFloat32()
-{
- nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- if (_output->data_scale() != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale for output"};
- }
- populateLookupTable();
- }
-}
-
-void LogisticLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logisticFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logisticQuant8();
- }
- else
- {
- throw std::runtime_error{"Logistic: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h
deleted file mode 100644
index cac77939d..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
- LogisticLayer();
-
-public:
- void logisticFloat32();
-
- void logisticQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
- void populateLookupTable();
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc
deleted file mode 100644
index 9631983be..000000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
- nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Max<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MaxLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- maximum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxQuant8();
- }
- else
- {
- throw std::runtime_error{"Max: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
deleted file mode 100644
index 1e983b408..000000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
- MAXPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
- MAXPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void MaxPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- maxPoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxPoolQuant8();
- }
- else
- {
- throw std::runtime_error{"MaxPool: unsupported data type"};
- }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc
deleted file mode 100644
index 20859673b..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
- nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Min<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MinLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- minimum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- minQuant8();
- }
- else if (_lhs->data_type() == OperandType::INT32)
- {
- minimum<int32_t>();
- }
- else
- {
- throw std::runtime_error{"Min: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h
deleted file mode 100644
index 9bd114e54..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
- MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- template <typename T> void minimum();
-
- void minQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc
deleted file mode 100644
index eef73edf3..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
-
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
-
- double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
- QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void MulLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- mulFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- mulQuant8();
- }
- else
- {
- throw std::runtime_error{"Mul: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h
deleted file mode 100644
index 2c4a98875..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
- MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void mulFloat32();
-
- void mulQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc
deleted file mode 100644
index 2cb95b771..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
- nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void NegLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- negFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- negQuant8();
- }
- else
- {
- throw std::runtime_error{"Neg: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h
deleted file mode 100644
index addf84ec2..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
- NegLayer();
-
-public:
- void negFloat32();
-
- void negQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
new file mode 100644
index 000000000..85d02a751
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+ if (op_type == PoolType::kAvg)
+ {
+ return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (op_type == PoolType::kMax)
+ {
+ return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported pool type"};
+ }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+#define POOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = strideHeight; \
+ op_params.stride_width = strideWidth; \
+ op_params.filter_height = kernelHeight; \
+ op_params.filter_width = kernelWidth; \
+ op_params.padding_values.height = (int8_t)paddingTop; \
+ op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+ const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ IPortableTensor *output, const PoolType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ POOLING_PARAMETERS
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ float output_activation_min = 0;
+ float output_activation_max = 0;
+ CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ _kernel = generateKernelGeneric<float>(op_params, op_type);
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported data type"};
+ }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h
index 4c5109f64..b37835946 100644
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
namespace ops
{
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
{
-public:
- MaxPoolLayer();
+ kAvg,
+ kL2,
+ kMax,
+};
+class PoolLayer : public ::onert::exec::IFunction
+{
public:
- void maxPoolFloat32();
-
- void maxPoolQuant8();
+ PoolLayer();
+public:
void configure(const IPortableTensor *input, const uint32_t paddingLeft,
const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
const uint32_t strideHeight, const uint32_t kernelWidth,
const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const PoolType op_type);
void run() override;
@@ -54,17 +57,7 @@ private:
const IPortableTensor *_input;
IPortableTensor *_output;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -72,4 +65,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
deleted file mode 100644
index 45fc148bf..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
- nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
- _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void QuantizeLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- affineQuantize<float, uint8_t>();
- }
- else
- {
- throw std::runtime_error{"Quantize: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
deleted file mode 100644
index b4e7aca40..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
- QuantizeLayer();
-
-public:
- template <typename InputT, typename OutputT> void affineQuantize();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 185d7554e..4690bdf72 100644
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "RoundLayer.h"
+#include "RankLayer.h"
#include "OperationUtils.h"
-#include <cker/operation/Round.h>
-
namespace onert
{
namespace backend
@@ -28,32 +26,28 @@ namespace cpu
{
namespace ops
{
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
{
- nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ // DO NOTHING
}
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
_output = output;
}
-void RoundLayer::run()
+void RankLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
+ if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
{
- roundFloat32();
+ int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+ output_data[0] = _input->num_dimensions();
}
else
{
- throw std::runtime_error{"Round: unsupported data type"};
+ throw std::runtime_error{"Rank : unsupported data type"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h
index 054894203..6282ceb07 100644
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
#include <backend/IPortableTensor.h>
@@ -29,11 +29,13 @@ namespace cpu
{
namespace ops
{
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
{
public:
- ZerosLikeLayer();
+ RankLayer();
+public:
void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
@@ -48,4 +50,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
deleted file mode 100644
index 26eb35e0d..000000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
- nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLU6Layer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- relu6Float32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- relu6Quant8();
- }
- else
- {
- throw std::runtime_error{"ReLU6: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc
deleted file mode 100644
index cb4529feb..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
- nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLULayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- reluFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- reluQuant8();
- }
- else
- {
- throw std::runtime_error{"ReLU: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index fe22dbed7..bb5f85d60 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
}
template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
- bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kSum:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
- [](const T current, const T in) -> T { return in + current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+ [](const T current, const T in) -> T { return in + current; });
break;
case ReduceType::kProd:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
- [](const T current, const T in) -> T { return in * current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+ [](const T current, const T in) -> T { return in * current; });
break;
case ReduceType::kMax:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
[](const T current, const T in) -> T { return (in > current) ? in : current; });
break;
case ReduceType::kMin:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
[](const T current, const T in) -> T { return (in < current) ? in : current; });
break;
default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
// Template specialization for bool type
template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
- ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kAny:
- return evalLogic<bool>(
- input, output, axes, keep_dims, false, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in || current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, false, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in || current; });
break;
case ReduceType::kAll:
- return evalLogic<bool>(
- input, output, axes, keep_dims, true, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in && current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, true, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in && current; });
break;
default:
throw std::runtime_error{"Reduce: Unsupported reduce type"};
}
}
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (input->data_type())
{
case OperandType::FLOAT32:
- return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<float>(keep_dims, reduce_kernel, reduce_type);
case OperandType::INT32:
- return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
case OperandType::BOOL8:
- return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
default:
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+// TODO Refine this function
void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes, bool keep_dims,
nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
return;
}
- evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+ const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+ kernel(input, output, axes);
}
} // namespace
ReduceLayer::ReduceLayer()
- : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
- _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+ : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+ _kernel()
{
// DO NOTHING
}
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
_input = input;
_axes = axes;
_output = output;
- _reduceType = reduceType;
- _keep_dims = keep_dims;
-}
-void ReduceLayer::run()
-{
- const auto axes = getReducerAxes(_axes);
- switch (_reduceType)
+ switch (reduceType)
{
case ReduceType::kSum:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, *_reduce_kernel);
return;
}
- evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
break;
case ReduceType::kProd:
- evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
break;
case ReduceType::kMax:
- evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
break;
case ReduceType::kMin:
- evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
break;
case ReduceType::kAny:
- evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
break;
case ReduceType::kAll:
- evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
break;
default:
throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
}
}
+void ReduceLayer::run()
+{
+ const auto axes = getReducerAxes(_axes);
+ _kernel(_input, _output, axes);
+}
+
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 8e7bcdb07..332d399bd 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
const IPortableTensor *_input;
const IPortableTensor *_axes;
IPortableTensor *_output;
- ReduceType _reduceType;
- bool _keep_dims;
std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+ std::function<void(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes)>
+ _kernel;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h
deleted file mode 100644
index fc6a46c0d..000000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
- RoundLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void roundFloat32();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
deleted file mode 100644
index 0bd468f96..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
- nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void RsqrtLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- rsqrtFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- rsqrtQuant8();
- }
- else
- {
- throw std::runtime_error{"Rsqrt: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h
deleted file mode 100644
index 49abbb08d..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
- RsqrtLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void rsqrtFloat32();
- void rsqrtQuant8();
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc
deleted file mode 100644
index 2a6b11753..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
- nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void SinLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- sinFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- sinQuant8();
- }
- else
- {
- throw std::runtime_error{"Sin: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h
deleted file mode 100644
index 348350f41..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
- SinLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void sinFloat32();
- void sinQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 6e2bb584a..095e67abc 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
// DO NOTHING
}
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
+void SoftMaxLayer::softmaxFloat32()
{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
+ if (getNumberOfDimensions(_input) == 1)
{
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
+ uint32_t input_size = getNumberOfElements(_input);
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+ reinterpret_cast<float *>(_output->buffer()));
}
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
- if (getNumberOfDimensions(_input) == 2)
+ else if (getNumberOfDimensions(_input) == 2)
{
uint32_t batch_size = getSizeOfDimension(_input, 0);
if (batch_size == 0)
throw std::runtime_error("batch_size should not be 0");
uint32_t input_size = getNumberOfElements(_input) / batch_size;
- Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+ _beta, reinterpret_cast<float *>(_output->buffer()));
}
else if (getNumberOfDimensions(_input) == 4)
{
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
}
else
{
- throw std::runtime_error{"only 2D and 4D tensors supported"};
+ throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc
deleted file mode 100644
index 597d52952..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- op_params.input2_multiplier *= -1;
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void SubLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- subFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- subQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- subInt32();
- }
- else
- {
- throw std::runtime_error{"Sub: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h
deleted file mode 100644
index 86f32ca6d..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
- SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void subFloat32();
-
- void subQuant8();
-
- void subInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc
deleted file mode 100644
index 910ac1f41..000000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = std::tanh(dequantized);
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void TanhLayer::tanhFloat32()
-{
- nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- PopulateLookupTable();
- }
-}
-
-void TanhLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- tanhFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- tanhQuant8();
- }
- else
- {
- throw std::runtime_error{"Tanh: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
deleted file mode 100644
index ae8084518..000000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
- if (!HaveSameShapes(_input, _output))
- throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
- auto element_size = getTensorShape(_input).FlatSize();
-
- switch (_input->data_type())
- {
- case OperandType::FLOAT32:
- memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
- break;
- case OperandType::INT32:
- memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
- break;
- default:
- throw std::runtime_error{"ZerosLike: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index c263aef2b..1eba29550 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -29,6 +29,7 @@ class Backend;
class IConstantInitializer;
class IKernelGenerator;
class ITensorRegister;
+struct ITensorRegistry;
struct ITensorBuilder;
struct IOptimizer;
@@ -45,14 +46,15 @@ public:
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
- constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
- tensor_register{tensor_register}, optimizer{optimizer}
+ : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
{
}
@@ -74,6 +76,7 @@ private:
std::vector<ir::OperandIndex> _operand_list;
public:
+ std::shared_ptr<ITensorRegistry> tensor_registry;
std::shared_ptr<ITensorBuilder> tensor_builder;
std::shared_ptr<IConstantInitializer> constant_initializer;
std::shared_ptr<IKernelGenerator> kernel_gen;
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h
index f322015ba..149acecb4 100644
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/IConstantInitializer.h
@@ -162,14 +162,14 @@ public:
public:
void run()
{
- assert(tensor_builder().get());
+ assert(tensor_registry());
for (const auto &it : _init_map)
{
const auto &ind = it.first;
const auto &fn = it.second;
const auto &model_obj = _operands.at(ind);
- auto tensor_obj = tensor_builder()->tensorAt(ind);
+ auto tensor_obj = tensor_registry()->getNativeITensor(ind);
assert(tensor_obj != nullptr);
fn(model_obj, *tensor_obj);
VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
@@ -189,10 +189,7 @@ public:
void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
protected:
- using OperationVisitor::visit;
-
-protected:
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+ virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
public:
virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
index b760cda0e..f93ab81ae 100644
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -40,11 +40,6 @@ struct ITensorBuilder
virtual ~ITensorBuilder(void) = default;
/**
- * @brief Returns true if this TensorBuilder support dynamic tensor
- */
- virtual bool supportDynamicTensor() = 0;
-
- /**
* @brief Register tensor information to allocate on backend
*
* @param ind Index
@@ -63,15 +58,6 @@ struct ITensorBuilder
*/
virtual bool isRegistered(const ir::OperandIndex &) const = 0;
- /**
- * @brief Get tensor registry
- *
- * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object
- *
- * @note Backend should implement this when it has StaticTensorManager and DynamicTensorManager
- */
- virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0;
-
public: // methods for static tensor allocation
/**
* @brief Let the tensor builder know first use(start of lifetime) of a tensor
@@ -104,32 +90,6 @@ public: // methods for static tensor allocation
virtual void postFunctionPrepare() = 0;
/**
- * @brief Get the tensor object
- *
- * @param ind Index of the tensor
- * @return std::shared_ptr<ITensor> The tensor object
- */
- virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
-
- /**
- * @brief Set the migrant tensor object
- *
- * @return true if succeeded
- * @return false if failed or unsupported
- */
- virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
- {
- return false;
- }
-
- /**
- * @brief Iterate over tensors
- *
- * @param fn The function to be run
- */
- virtual void iterate(const IterateFunction &fn) = 0;
-
- /**
* @brief Release static @c ITensorManger object which was built
* Before calling this, @c allocate must have been called
*
@@ -147,10 +107,7 @@ public: // methods for dynamic tensor allocation
* @note Since it is a pointer, its life time is from the cration of TensorBuilder
* to the end of execution
*/
- virtual IDynamicTensorManager *dynamicTensorManager(void)
- {
- throw std::runtime_error("dynamicTensorManager(): NYI");
- }
+ virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
/**
* @brief Release dynamic @c ITensorManger object which was built
@@ -158,10 +115,7 @@ public: // methods for dynamic tensor allocation
*
* @return std::unique_ptr<ITensorManager> Tensor Manager object
*/
- virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void)
- {
- throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported");
- }
+ virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h
index 855513124..88fcb0fcd 100644
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -21,6 +21,7 @@
#include "ir/Index.h"
#include "backend/ITensor.h"
+#include "backend/IPortableTensor.h"
namespace onert
{
@@ -51,13 +52,22 @@ struct ITensorRegistry
* @note Returned tensor cannot be used longer than dynamic tensor manager
*/
virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+ /**
+ * @brief Set the Migrant Tensor which are from other backends
+ *
+ * @return true if supported
+ * @return false if not supported
+ */
+ virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+ {
+ return false;
+ }
};
} // namespace backend
} // namespace onert
#include "ir/OperandIndexMap.h"
-#include "backend/IPortableTensor.h"
namespace onert
{
@@ -108,24 +118,23 @@ public:
return nullptr;
}
- bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
{
- // TODO Uncomment this as two tensors for an index is not allowed.
- // But now it is temporarily allowed as a workaround. External one hides Managed one.
- // auto itr = _native.find(ind);
- // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
- // throw std::runtime_error{
- // "Tried to set an migrant tensor but an native tensor already exists."};
+ assert(tensor != nullptr);
+ auto itr = _native.find(ind);
+ if (itr != _native.end())
+ throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."};
_migrant[ind] = tensor;
return true;
}
void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
{
+ assert(tensor != nullptr);
auto itr = _migrant.find(ind);
- if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
- throw std::runtime_error{
- "Tried to set a native tensor but an migrant tensor already exists."};
+ if (itr != _migrant.end())
+ throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
_native[ind] = tensor;
}
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index a7e034a91..3f09b7a4a 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -20,6 +20,7 @@
#include "MemoryManager.h"
#include "backend/IStaticTensorManager.h"
+#include "backend/IDynamicTensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
@@ -34,7 +35,8 @@ namespace cpu_common
class StaticTensorManager : public backend::IStaticTensorManager
{
public:
- StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+ StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+ IDynamicTensorManager *dynamic_tensor_manager);
virtual ~StaticTensorManager() = default;
void allocateConsts(void);
@@ -55,6 +57,7 @@ private:
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
+ IDynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
new file mode 100644
index 000000000..aadba6857
--- /dev/null
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that contains lowering information on graph.
+ * In addition, after lowering, operands in graph will be set to "dynamic"
+ * if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredGraph
+{
+public:
+ LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+
+ ir::Graph &graph() { return _graph; }
+ const ir::Graph &graph() const { return _graph; }
+ const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+ const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
+ void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
+ const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
+ ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
+ void setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OperandIndex &index);
+ ir::OpSequences &op_seqs() { return _op_seqs; }
+ const ir::OpSequences &op_seqs() const { return _op_seqs; }
+ void iterateTopolOpSeqs(
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
+ void
+ iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
+ const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+ const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+ void
+ makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const compiler::CompilerOptions &options,
+ const compiler::BackendResolver &backend_resolver);
+
+ void manipulateLowerInfo(
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary);
+ void dumpLowerInfo();
+ bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
+ ir::Layout layout, const compiler::BackendResolver &backend_resolver);
+ ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node);
+
+private:
+ ir::Graph _graph;
+ backend::BackendContexts _backend_contexts;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ ir::LowerInfoMap _lower_info_map;
+ // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+ ir::OpSequences _op_seqs;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h
index bff68c9fa..b97cb5b7b 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -19,7 +19,7 @@
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include <memory>
@@ -41,7 +41,8 @@ class StaticShapeInferer : public ir::OperationVisitor
public:
StaticShapeInferer(
const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs)
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
: _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
_operations(lowered_subgs.at(subg_idx)->graph().operations()),
_return_has_dynamic_tensor(false)
@@ -57,54 +58,34 @@ public:
* @param op_seq sequence of operations
* @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::OpSequence &op_seq)
- {
- bool has_dynamic_tensor = false;
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- for (const auto &operation_idx : op_seq.operations())
- {
- _operations.at(operation_idx).accept(*this);
-
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
- }
-
- return has_dynamic_tensor;
- }
+ bool infer(const ir::OpSequence &op_seq);
void dump();
private:
+ bool checkDynamicInput(const ir::Operation &op);
+ void setDynamicOutput(const ir::Operation &op);
+
+private:
// TODO Define visitors for operations. List them in alphabetic order.
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
void visit(const ir::operation::If &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
@@ -113,27 +94,21 @@ private:
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
@@ -149,7 +124,8 @@ private:
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
private:
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs;
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &_lowered_subgs;
// _operands and _operations can be changed by controlflow operation
ir::Operands &_operands; // operands of current subgraph
ir::Operations &_operations; // operations of current subgraph
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h
index bca80db09..6f6659659 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -38,46 +38,34 @@ namespace exec
class DynamicShapeInferer : public ir::OperationVisitor
{
public:
- DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager,
+ DynamicShapeInferer(const ir::Operands &operands,
const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
- : _operands(operands), _dynamic_tensor_manager(tensor_manager),
- _tensor_registry(tensor_registry)
+ : _operands(operands), _tensor_registry(tensor_registry)
{
UNUSED_RELEASE(_operands);
- UNUSED_RELEASE(_dynamic_tensor_manager);
UNUSED_RELEASE(_tensor_registry);
}
public:
// TODO Define visitors for operations. List them in alphabetic order.
// Remove TODO when any op starting from the alphabet is added
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
@@ -87,27 +75,21 @@ public:
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
// TODO write op starting from V
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
@@ -127,11 +109,6 @@ private:
*/
const ir::Operands &_operands;
/**
- * @brief To allocate memory for output tensor if needed
- */
- // TODO Remove this, as it is no longer used
- backend::IDynamicTensorManager *_dynamic_tensor_manager;
- /**
* @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
*/
std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 46e05a289..6c8bab67c 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -80,8 +80,6 @@ struct DynAllocInfo
{
/// @brief index of input tensor whose memory needs to be allocated at execution time
ir::OperandIndex ind;
- /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic
- backend::IDynamicTensorManager *dyn_tensor_manager;
};
using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h
index c10c36756..d1810ec3b 100644
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -62,8 +62,8 @@ struct IODescription
{
std::vector<std::unique_ptr<InputDesc>> inputs;
std::vector<std::unique_ptr<OutputDesc>> outputs;
- // Contains shape of input set by set_input_tensorinfo
- std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature;
+ // Contains shape of input set by nnfw_set_input_tensorinfo(..)
+ std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes;
};
} // namespace exec
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index fb956fedf..2103e6e64 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -60,8 +60,8 @@ public:
OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
OperationIndex addOperation(std::unique_ptr<Operation> &&node);
void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
- void addInput(const OperandIndex &ind);
- void addOutput(const OperandIndex &ind);
+ void addInput(const OperandIndex &ind, const std::string &name = "");
+ void addOutput(const OperandIndex &ind, const std::string &name = "");
void finishBuilding(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
@@ -94,6 +94,8 @@ public:
OperandIndexSequence &getInputs() { return _inputs; }
const OperandIndexSequence &getOutputs() const { return _outputs; }
OperandIndexSequence &getOutputs() { return _outputs; }
+ IOIndex getInputIndex(const std::string &name) const;
+ IOIndex getOutputIndex(const std::string &name) const;
const Operands &operands() const { return _operands; }
Operands &operands() { return _operands; } // TODO Remove this non-const accessor
const Operations &operations() const { return _operations; }
@@ -108,6 +110,8 @@ private:
Operands _operands;
OperandIndexSequence _inputs;
OperandIndexSequence _outputs;
+ std::unordered_map<std::string, IOIndex> _name_to_input;
+ std::unordered_map<std::string, IOIndex> _name_to_output;
// Child subgraphs
std::shared_ptr<Subgraphs> _subgraphs;
// TFLite and circle's default layout is NHWC;
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h
index e42db72cf..1d962c185 100644
--- a/runtime/onert/core/include/ir/InternalType.h
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -40,6 +40,12 @@ struct Stride
uint32_t horizontal;
};
+struct Dilation
+{
+ uint32_t width_factor;
+ uint32_t height_factor;
+};
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h
deleted file mode 100644
index d6583df24..000000000
--- a/runtime/onert/core/include/ir/LoweredGraph.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
-
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
-#include "compiler/BackendResolver.h"
-#include "compiler/Compiler.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that contains lowering information on graph.
- * In addition, after lowering, operands in graph will be set to "dynamic"
- * if the shape of output of an operation cannot be decided at compilation time.
- */
-class LoweredGraph
-{
-public:
- LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
-
- Graph &graph() { return _graph; }
- const Graph &graph() const { return _graph; }
- const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
- const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
- void setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info);
- void removeLowerInfo(const OpSequenceIndex &op_seq_index);
- const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
- operand::LowerInfo *getLowerInfo(const OperandIndex &index);
- void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
- void removeLowerInfo(const OperandIndex &index);
- OpSequences &op_seqs() { return _op_seqs; }
- const OpSequences &op_seqs() const { return _op_seqs; }
- void iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const;
- void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn);
- const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
- const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
- std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-
-private:
- void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options,
- const compiler::BackendResolver &backend_resolver);
-
- void
- manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- bool is_primary);
- void dumpLowerInfo();
- bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver);
- OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node);
-
-private:
- Graph _graph;
- backend::BackendContexts _backend_contexts;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- LowerInfoMap _lower_info_map;
- // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
- OpSequences _op_seqs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h
index 6ed8499bc..ab258f395 100644
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ b/runtime/onert/core/include/ir/OpSequences.h
@@ -63,13 +63,6 @@ public:
*/
OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
/**
- * @brief Dump OpSequences
- *
- * @param msg Message that will be displayed
- * @param graph Graph that has information used for dump
- */
- void dump(const std::string &msg, const Operations &operations) const;
- /**
* @brief Remove an operation from OpSequence
*
* @param operation_index Operation index to be removed
@@ -84,6 +77,14 @@ private:
mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
};
+/**
+ * @brief Dump OpSequences
+ *
+ * @param op_seqs Operation Sequences
+ * @param operations Operation context
+ */
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 30c4ff25a..17bbbc29c 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,10 +17,10 @@
// This file has no ifdef guard intentionally
#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
#include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Concat.h"
#include "ir/operation/Reshape.h"
#include "ir/operation/Fill.h"
@@ -29,51 +29,32 @@
#include "ir/operation/Transpose.h"
#include "ir/operation/Permute.h"
#include "ir/operation/Reduce.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
#include "ir/operation/DepthwiseConv2D.h"
#include "ir/operation/Slice.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
#include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Log.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
#include "ir/operation/ExpandDims.h"
#include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
#include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
#include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include "ir/operation/Reverse.h"
#include "ir/operation/RNN.h"
-#include "ir/operation/Round.h"
-#include "ir/operation/Floor.h"
#include "ir/operation/SpaceToBatchND.h"
#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
#include "ir/operation/EmbeddingLookup.h"
#include "ir/operation/L2Normalization.h"
#include "ir/operation/HashtableLookup.h"
#include "ir/operation/InstanceNorm.h"
#include "ir/operation/PReLU.h"
#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
#include "ir/operation/SquaredDifference.h"
#include "ir/operation/TopKV2.h"
#include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
#include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
#include "ir/operation/LocalResponseNormalization.h"
#include "ir/operation/DepthToSpace.h"
#include "ir/operation/Pack.h"
@@ -82,27 +63,22 @@
#include "ir/operation/SplitV.h"
#include "ir/operation/Unpack.h"
#include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
#include "ir/operation/Custom.h"
#include "ir/operation/Einsum.h"
#include "ir/operation/OneHot.h"
-#include "ir/operation/Cos.h"
-#include "ir/operation/Sin.h"
#include "ir/operation/Shape.h"
#include "ir/operation/ConvertFp32ToFp16.h"
#include "ir/operation/ConvertFp16ToFp32.h"
#include "ir/operation/If.h"
#include "ir/operation/While.h"
#include "ir/operation/Pow.h"
-#include "ir/operation/ZerosLike.h"
#include "ir/operation/Tile.h"
#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
#include "ir/operation/BCQFullyConnected.h"
#include "ir/operation/BCQGather.h"
#include "ir/operation/MatrixBandPart.h"
#include "ir/operation/BatchMatMul.h"
#include "ir/operation/FusedBatchNorm.h"
#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/Quantize.h"
#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index 75c6d8221..ab2146821 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -19,62 +19,44 @@
#endif
// Internal Name
-OP(Add)
-OP(Sub)
OP(BatchToSpaceND)
+OP(BinaryArithmetic)
OP(BroadcastTo)
-OP(Cast)
OP(Conv2D)
OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
+OP(Pool2D)
OP(Concat)
OP(Fill)
OP(FullyConnected)
OP(Reduce)
OP(Reshape)
-OP(Mul)
OP(Softmax)
OP(Squeeze)
OP(Slice)
OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
OP(Transpose)
-OP(Exp)
+OP(ElementwiseActivation)
+OP(ElementwiseBinary)
+OP(ElementwiseUnary)
OP(ExpandDims)
OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
+OP(ResizeNearestNeighbor)
OP(Reverse)
OP(RNN)
-OP(Round)
-OP(Floor)
OP(SpaceToBatchND)
OP(SpaceToDepth)
-OP(L2Pool2D)
OP(EmbeddingLookup)
OP(L2Normalization)
OP(HashtableLookup)
OP(InstanceNorm)
OP(PReLU)
OP(TransposeConv)
-OP(SQRT)
OP(SquaredDifference)
OP(TopKV2)
OP(Gather)
-OP(Neg)
-OP(Abs)
OP(ArgMax)
-OP(Dequantize)
OP(Einsum)
OP(LocalResponseNormalization)
OP(DepthToSpace)
@@ -86,26 +68,20 @@ OP(Unpack)
OP(Pad)
OP(Custom)
OP(Permute)
-OP(Min)
-OP(Max)
OP(OneHot)
-OP(Cos)
-OP(Sin)
OP(Shape)
OP(ConvertFp32ToFp16)
OP(ConvertFp16ToFp32)
OP(If)
OP(While)
-OP(Log)
OP(Pow)
-OP(ZerosLike)
OP(Tile)
OP(Range)
+OP(Rank)
OP(BCQFullyConnected)
OP(BCQGather)
OP(MatrixBandPart)
OP(BatchMatMul)
OP(FusedBatchNorm)
OP(LogSoftmax)
-OP(Quantize)
OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h
index b9053914d..8a7bcdbeb 100644
--- a/runtime/onert/core/include/ir/Padding.h
+++ b/runtime/onert/core/include/ir/Padding.h
@@ -65,7 +65,8 @@ struct Padding
// TODO Change to Padding struct's method
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh);
+ uint32_t kw, uint32_t kh, uint32_t dwf = 1,
+ uint32_t dhf = 1);
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h
deleted file mode 100644
index 9126c0027..000000000
--- a/runtime/onert/core/include/ir/operation/Abs.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ABS_H__
-#define __ONERT_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ABS_H__
diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
index 5f5f4e0fe..110fff565 100644
--- a/runtime/onert/core/include/ir/operation/Add.h
+++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ADD_H__
-#define __ONERT_IR_OPERATION_ADD_H__
+#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
+#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
#include "ir/Operation.h"
#include "ir/InternalType.h"
@@ -27,7 +27,7 @@ namespace ir
namespace operation
{
-class Add : public Operation
+class BinaryArithmetic final : public Operation
{
public:
enum Input
@@ -36,17 +36,28 @@ public:
RHS
};
+ enum class ArithmeticType
+ {
+ ADD,
+ SUB,
+ MUL,
+ DIV
+ };
+
struct Param
{
+ ArithmeticType arithmetic_type;
Activation activation;
};
public:
- Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::BinaryArithmetic; }
public:
const Param &param() const { return _param; }
@@ -59,4 +70,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ADD_H__
+#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
diff --git a/runtime/onert/core/include/ir/operation/BroadcastTo.h b/runtime/onert/core/include/ir/operation/BroadcastTo.h
index 98906adc2..06c033497 100644
--- a/runtime/onert/core/include/ir/operation/BroadcastTo.h
+++ b/runtime/onert/core/include/ir/operation/BroadcastTo.h
@@ -42,7 +42,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
+ OpCode opcode() const final { return OpCode::BroadcastTo; }
};
} // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h
deleted file mode 100644
index 6fb8c105b..000000000
--- a/runtime/onert/core/include/ir/operation/Cast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_CAST_H__
-#define __ONERT_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_CAST_H__
diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h
index e23bf3eb3..d8c7b671b 100644
--- a/runtime/onert/core/include/ir/operation/Conv2D.h
+++ b/runtime/onert/core/include/ir/operation/Conv2D.h
@@ -45,6 +45,7 @@ public:
Stride stride;
Padding padding;
Activation activation;
+ Dilation dilation;
};
public:
diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h
deleted file mode 100644
index 97a08b33c..000000000
--- a/runtime/onert/core/include/ir/operation/Dequantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
-#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h
index a3426ccbc..9892c24b8 100644
--- a/runtime/onert/core/include/ir/operation/Einsum.h
+++ b/runtime/onert/core/include/ir/operation/Einsum.h
@@ -41,7 +41,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ OpCode opcode() const final { return OpCode::Einsum; }
public:
const Param &param() const { return _param; }
diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
index a7ec1c465..b2a1d3d2d 100644
--- a/runtime/onert/core/include/ir/operation/Div.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_DIV_H__
-#define __ONERT_IR_OPERATION_DIV_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
@@ -27,30 +26,46 @@ namespace ir
namespace operation
{
-class Div : public Operation
+class ElementwiseActivation : public Operation
{
public:
enum Input
{
- LHS = 0,
- RHS
+ INPUT = 0
+ };
+
+ enum class Type
+ {
+ ELU,
+ LOGISTIC,
+ RELU,
+ TANH,
+ LEAKY_RELU
};
struct Param
{
- Activation activation;
+ Type op_type;
+ float alpha;
+ float beta;
+ Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {}
};
public:
- Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Div; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseActivation; }
public:
const Param &param() const { return _param; }
+public:
+ static float infinity;
+
private:
Param _param;
};
@@ -59,4 +74,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_DIV_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
index 0f01b0ecf..dd07f6058 100644
--- a/runtime/onert/core/include/ir/operation/Mul.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MUL_H__
-#define __ONERT_IR_OPERATION_MUL_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
@@ -27,7 +26,7 @@ namespace ir
namespace operation
{
-class Mul : public Operation
+class ElementwiseBinary : public Operation
{
public:
enum Input
@@ -36,17 +35,27 @@ public:
RHS
};
+ enum class ElementwiseBinaryType
+ {
+ LOGICAL_AND,
+ LOGICAL_OR,
+ MAX,
+ MIN
+ };
+
struct Param
{
- Activation activation;
+ ElementwiseBinaryType op_type;
};
public:
- Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mul; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseBinary; }
public:
const Param &param() const { return _param; }
@@ -59,4 +68,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MUL_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index 300f7cb3c..c40778a56 100644
--- a/runtime/onert/core/include/ir/operation/MaxPool2D.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
-#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
@@ -30,7 +26,7 @@ namespace ir
namespace operation
{
-class MaxPool2D : public Operation
+class ElementwiseUnary : public Operation
{
public:
enum Input
@@ -38,22 +34,40 @@ public:
INPUT = 0
};
+ enum class Type
+ {
+ ABS,
+ CAST,
+ COS,
+ DEQUANTIZE,
+ ERF,
+ EXP,
+ FLOOR,
+ LOG,
+ LOGICAL_NOT,
+ NEG,
+ QUANTIZE,
+ ROUND,
+ RSQRT,
+ SIN,
+ SQRT,
+ SQURE,
+ ZEROS_LIKE
+ };
+
struct Param
{
- uint32_t kh;
- uint32_t kw;
- Stride stride;
- Padding padding;
- Activation activation;
+ Type op_type;
};
public:
- MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::MaxPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseUnary; }
public:
const Param &param() const { return _param; }
@@ -66,4 +80,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h
deleted file mode 100644
index 2e68ff07a..000000000
--- a/runtime/onert/core/include/ir/operation/Exp.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EXP_H__
-#define __ONERT_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_EXP_H__
diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h
deleted file mode 100644
index b34699c22..000000000
--- a/runtime/onert/core/include/ir/operation/Floor.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_FLOOR_H__
-#define __ONERT_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_FLOOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Log.h b/runtime/onert/core/include/ir/operation/Log.h
deleted file mode 100644
index a6e3ca3f6..000000000
--- a/runtime/onert/core/include/ir/operation/Log.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOG_H__
-#define __ONERT_IR_OPERATION_LOG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Log : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Log; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOG_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h
deleted file mode 100644
index dc853b6a9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalAnd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
-#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h
deleted file mode 100644
index 9519f6d47..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalNot.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h
deleted file mode 100644
index c4b658cd9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalOr.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
-#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h
deleted file mode 100644
index 5421e1c84..000000000
--- a/runtime/onert/core/include/ir/operation/Logistic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
-#define __ONERT_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h
deleted file mode 100644
index df72d3ae9..000000000
--- a/runtime/onert/core/include/ir/operation/Max.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MAX_H__
-#define __ONERT_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h
deleted file mode 100644
index ce2da908d..000000000
--- a/runtime/onert/core/include/ir/operation/Mean.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MEAN_H__
-#define __ONERT_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
- enum Input
- {
- INPUT,
- AXES
- };
-
- struct Param
- {
- bool keep_dims;
- };
-
-public:
- Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mean; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MEAN_H__
diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h
deleted file mode 100644
index 117301c00..000000000
--- a/runtime/onert/core/include/ir/operation/Min.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MIN_H__
-#define __ONERT_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h
deleted file mode 100644
index f8123c485..000000000
--- a/runtime/onert/core/include/ir/operation/Neg.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_NEG_H__
-#define __ONERT_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_NEG_H__
diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/Pool2D.h
index d5b300a35..22425b4c2 100644
--- a/runtime/onert/core/include/ir/operation/AvgPool2D.h
+++ b/runtime/onert/core/include/ir/operation/Pool2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
-#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#ifndef __ONERT_IR_OPERATION_POOL2D_H__
+#define __ONERT_IR_OPERATION_POOL2D_H__
#include <memory>
@@ -30,7 +30,7 @@ namespace ir
namespace operation
{
-class AvgPool2D : public Operation
+class Pool2D : public Operation
{
public:
enum Input
@@ -38,23 +38,31 @@ public:
INPUT = 0
};
+ enum class PoolType
+ {
+ AVG,
+ L2,
+ MAX,
+ };
+
struct Param
{
+ PoolType op_type;
uint32_t kh;
uint32_t kw;
-
Stride stride;
Padding padding;
Activation activation;
};
public:
- AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::AvgPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::Pool2D; }
public:
const Param &param() const { return _param; }
@@ -67,4 +75,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_POOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h
deleted file mode 100644
index 2533ce432..000000000
--- a/runtime/onert/core/include/ir/operation/Quantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
-#define __ONERT_IR_OPERATION_QUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Quantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Quantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h
deleted file mode 100644
index 64bb4f10a..000000000
--- a/runtime/onert/core/include/ir/operation/RSQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RSQRT_H__
-#define __ONERT_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RSQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Cos.h b/runtime/onert/core/include/ir/operation/Rank.h
index a6d7851bd..2fd24ce23 100644
--- a/runtime/onert/core/include/ir/operation/Cos.h
+++ b/runtime/onert/core/include/ir/operation/Rank.h
@@ -14,8 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_COS_H__
-#define __ONERT_IR_OPERATION_COS_H__
+#ifndef __ONERT_IR_OPERATION_RANK_H__
+#define __ONERT_IR_OPERATION_RANK_H__
+
+#include <memory>
#include "ir/Operation.h"
@@ -26,7 +28,7 @@ namespace ir
namespace operation
{
-class Cos : public Operation
+class Rank : public Operation
{
public:
enum Input
@@ -35,15 +37,15 @@ public:
};
public:
- Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+ Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cos; }
+ OpCode opcode() const final { return OpCode::Rank; }
};
} // namespace operation
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_COS_H__
+#endif // __ONERT_IR_OPERATION_RANK_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h
deleted file mode 100644
index 9eb0c091b..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RELU_H__
-#define __ONERT_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RELU_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h
deleted file mode 100644
index 134ee573a..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU1.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU1_H__
-#define __ONERT_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU1_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h
deleted file mode 100644
index e658c4925..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU6.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU6_H__
-#define __ONERT_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU6_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
index d369fd5fc..e4d810eeb 100644
--- a/runtime/onert/core/include/ir/operation/L2Pool2D.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,12 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
-#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
#include <memory>
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
@@ -30,7 +28,7 @@ namespace ir
namespace operation
{
-class L2Pool2D : public Operation
+class ResizeNearestNeighbor : public Operation
{
public:
enum Input
@@ -40,20 +38,18 @@ public:
struct Param
{
- Padding padding;
- Stride stride;
- uint32_t kw;
- uint32_t kh;
- Activation activation;
+ int32_t height_out;
+ int32_t width_out;
+ bool align_corners;
};
public:
- L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::L2Pool2D; }
+ OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; }
public:
const Param &param() const { return _param; }
@@ -66,4 +62,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Round.h b/runtime/onert/core/include/ir/operation/Round.h
deleted file mode 100644
index 44af0d861..000000000
--- a/runtime/onert/core/include/ir/operation/Round.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ROUND_H__
-#define __ONERT_IR_OPERATION_ROUND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Round : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Round; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ROUND_H__
diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h
deleted file mode 100644
index 8563b1ab1..000000000
--- a/runtime/onert/core/include/ir/operation/SQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SQRT_H__
-#define __ONERT_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Select.h b/runtime/onert/core/include/ir/operation/Select.h
index 400ac9d3e..33bf67886 100644
--- a/runtime/onert/core/include/ir/operation/Select.h
+++ b/runtime/onert/core/include/ir/operation/Select.h
@@ -41,7 +41,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
+ OpCode opcode() const final { return OpCode::Select; }
};
} // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h
deleted file mode 100644
index aef44ab2e..000000000
--- a/runtime/onert/core/include/ir/operation/Sin.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SIN_H__
-#define __ONERT_IR_OPERATION_SIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sin : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sin; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h
deleted file mode 100644
index 0674e6e4d..000000000
--- a/runtime/onert/core/include/ir/operation/Sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SUB_H__
-#define __ONERT_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sub; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SUB_H__
diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h
deleted file mode 100644
index 9b8d03bca..000000000
--- a/runtime/onert/core/include/ir/operation/Tanh.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_TANH_H__
-#define __ONERT_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_TANH_H__
diff --git a/runtime/onert/core/include/ir/operation/ZerosLike.h b/runtime/onert/core/include/ir/operation/ZerosLike.h
deleted file mode 100644
index 7c2851858..000000000
--- a/runtime/onert/core/include/ir/operation/ZerosLike.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ZerosLike : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ZerosLike; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 1718e034c..5077fad69 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon")
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/core/include/util/Exceptions.h
index cd27b0e40..fc3fa0f64 100644
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -14,44 +14,35 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__
+#define __ONERT_UTIL_ONERTEXCEPTION_H__
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
+#include <string>
namespace onert
{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class ExpLayer : public ::onert::exec::IFunction
+class OnertException : public std::exception
{
public:
- ExpLayer();
-
-public:
- void expFloat32();
+ OnertException(const std::string &msg) : _msg{msg} {}
+ OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {}
- void expQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
+ const char *what() const noexcept override { return _msg.c_str(); }
private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
+ std::string _msg;
+};
+
+class InsufficientBufferSizeException : public OnertException
+{
+public:
+ InsufficientBufferSizeException(const std::string &msg)
+ : OnertException{"InsufficientBufferSize", msg}
+ {
+ }
};
-} // namespace ops
-} // namespace cpu
-} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#endif // __ONERT_UTIL_ONERTEXCEPTION_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index a68c22b16..1ebed48f2 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -19,15 +19,13 @@
#include "Utils.h"
-#include "ir/operation/AvgPool2D.h"
#include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
#include "ir/operation/Conv2D.h"
#include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reshape.h"
-#include "ir/operation/RSQRT.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
@@ -46,8 +44,6 @@ using Shapes = std::vector<ir::Shape>;
ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param &param);
@@ -74,15 +70,15 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
int rank);
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis);
ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num);
ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads);
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements,
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
index 3c7325912..670f7750f 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -21,6 +21,7 @@
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
+#include "Tensor.h"
#include <backend/Backend.h>
@@ -63,10 +64,12 @@ public:
// there is no such case until now, let's support it later
// TODO Remove TensorBuilder and ConstantInitializer
// TODO Support Consecutive controflow operation's intermediate tensor
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
context->tensor_register = nullptr;
context->optimizer = nullptr;
return context;
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
index 35cc7835e..e21a8f357 100644
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
@@ -33,16 +33,16 @@ class ConstantInitializer : public IConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
}
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
index e538f3fd3..1288e4c96 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -17,6 +17,8 @@
#include "DynamicTensorManager.h"
#include "util/logging.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
namespace onert
{
@@ -25,10 +27,8 @@ namespace backend
namespace controlflow
{
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- const std::shared_ptr<UserTensorRegistry> &user_reg)
- : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg},
- _user_tensors{user_reg}
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
+ : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
{
// DO NOTHING
}
@@ -36,20 +36,20 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten
void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
{
// NOTE Handle user tensors first
- auto user_tensor = _user_tensors->getNativeTensor(ind);
+ auto user_tensor = _tensors->getNativeUserTensor(ind);
if (user_tensor)
{
// User tensors cannot be reallocated.
auto buffer_size = user_tensor->total_size();
auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
if (buffer_size < new_size)
- throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
+ throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
user_tensor->setShape(new_shape);
return;
}
- // NOTE Then handle native tensors
- auto tensor = _tensors->getNativeTensor(ind);
+ // NOTE Then handle own tensors
+ auto tensor = _tensors->getNativeOwnTensor(ind);
assert(tensor);
bool previously_dynamic = tensor->is_dynamic();
@@ -102,24 +102,13 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- assert(_tensors->getNativeTensor(ind) == nullptr);
auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeTensor(ind, tensor);
+ _tensors->setNativeOwnTensor(ind, tensor);
}
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
index 446427d64..dbe388ba2 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -17,11 +17,11 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#include "UserTensorRegistry.h"
+#include "TensorRegistry.h"
+#include "Tensor.h"
#include <backend/IDynamicTensorManager.h>
#include <backend/cpu_common/MemoryManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
#include <ir/OperandInfo.h>
#include <ir/Operation.h>
#include <ir/Index.h>
@@ -33,16 +33,13 @@ namespace backend
namespace controlflow
{
-// TODO Find optimized algorithm to manage memory.
-
/**
* @brief Class to manage dynamic tensor and its memory
*/
class DynamicTensorManager : public backend::IDynamicTensorManager
{
public:
- DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- const std::shared_ptr<UserTensorRegistry> &user_reg);
+ DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
virtual ~DynamicTensorManager() = default;
@@ -61,9 +58,7 @@ private:
* @todo DynamicMemoryManager is not optimized. Optimized one is needed
*/
std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
- // TODO Refactoring : Merge two TensorRegistries into one
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- const std::shared_ptr<UserTensorRegistry> _user_tensors;
+ const std::shared_ptr<TensorRegistry> _tensors;
// contains list of dynamic tensor index, which can be deallocated after running operation
// note: this map could contain static tensor index too. Careful use is required.
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index eb83b7de4..de5a6a5f6 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,24 +31,24 @@ namespace backend
namespace controlflow
{
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr}
+KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
+ _tensor_registries{}, _executor_map{nullptr}
{
UNUSED_RELEASE(_graph);
- UNUSED_RELEASE(_tensor_builder_set);
+ UNUSED_RELEASE(_tensor_registries);
UNUSED_RELEASE(_executor_map);
}
void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
- assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_dyn_tensor_manager);
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>(
- _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
@@ -58,8 +58,8 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_graph.operations();
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+ dyn_ctx->tensor_registry = _tensor_reg;
+ dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
@@ -93,12 +93,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
auto output_tensor = getTensor(output_index);
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
@@ -121,14 +116,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- const auto output_tensor_builder = getTensorBuilder(output_index);
- VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl;
- assert(output_tensor_builder != nullptr);
- if (output_tensor_builder->supportDynamicTensor())
- {
- outputs_dyn_alloc_info[output_tensors.at(0)] =
- exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()};
- }
+ outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
auto fn =
std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
@@ -159,12 +147,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
@@ -178,34 +161,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- std::shared_ptr<backend::ITensor> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto tensor = tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor;
- break;
- }
- }
- assert(ret != nullptr);
- return ret;
-}
-
-std::shared_ptr<backend::ITensorBuilder>
-KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
-{
- std::shared_ptr<backend::ITensorBuilder> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto reg = tensor_builder->tensorRegistry();
- auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor_builder;
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
assert(ret != nullptr);
return ret;
}
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index 1fc77935c..b84a810e4 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -22,9 +22,8 @@
#include <exec/IExecutor.h>
#include <ir/Graph.h>
#include "TensorBuilder.h"
-#include "compiler/TensorBuilders.h"
-
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
+#include "TensorRegistry.h"
namespace onert
{
@@ -36,11 +35,12 @@ namespace controlflow
class KernelGenerator : public IKernelGenerator
{
public:
- KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder);
+ KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg);
- void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set)
+ void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
{
- _tensor_builder_set = tensor_builder_set;
+ _tensor_registries = tensor_registries;
}
void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
@@ -57,12 +57,12 @@ public:
private:
std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
- std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- compiler::TensorBuilders _tensor_builder_set;
+ IDynamicTensorManager *_dyn_tensor_manager;
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ compiler::TensorRegistries _tensor_registries;
exec::ExecutorMap *_executor_map;
};
diff --git a/runtime/onert/core/src/ir/operation/Log.cc b/runtime/onert/core/src/backend/controlflow/Tensor.h
index 85598bc87..ba5bafd75 100644
--- a/runtime/onert/core/src/ir/operation/Log.cc
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -14,26 +14,22 @@
* limitations under the License.
*/
-#include "ir/operation/Log.h"
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
+#include <backend/cpu_common/Tensor.h>
namespace onert
{
-namespace ir
+namespace backend
{
-namespace operation
+namespace controlflow
{
-void Log::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
+using Tensor = cpu_common::Tensor;
-} // namespace operation
-} // namespace ir
+} // namespace controlflow
+} // namespace backend
} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index 5bddb9185..e5c3f5fd5 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -27,10 +27,10 @@ namespace backend
namespace controlflow
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
- _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)}
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{
+ new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
@@ -54,10 +54,13 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op
void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +69,11 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
@@ -74,6 +81,11 @@ void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
{
+ // User tensors are not registered in _tensor_info_map but objects for them are exist
+ // in the tensor registry.
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_reg->getITensor(ind))
+ return true;
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
@@ -89,25 +101,9 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- // NOTE Find from User Tensor Registry first
- // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
- auto user_tensor = _user_tensor_reg->getITensor(ind);
- auto tensor = _tensor_reg->getITensor(ind);
- if (user_tensor)
- {
- return user_tensor;
- }
- else
- return tensor;
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
{
- return _tensor_reg->getNativeTensor(ind);
+ return _tensor_reg->getNativeOwnTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
@@ -120,10 +116,10 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
return std::move(_dynamic_tensor_mgr);
}
-void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<UserTensor> &tensor)
+void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<UserTensor> &tensor)
{
- _user_tensor_reg->setNativeTensor(ind, tensor);
+ _tensor_reg->setNativeUserTensor(ind, tensor);
}
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 9f2bb3754..2f2a2c47e 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -39,9 +39,7 @@ namespace controlflow
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
@@ -61,15 +59,6 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
@@ -82,16 +71,13 @@ public:
* If not, program will crash with assert or exception.
* @return shared_ptr<operand::Tensor>
*/
- std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
- void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
+ std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
+ void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- const std::shared_ptr<UserTensorRegistry> _user_tensor_reg;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+ const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
};
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
new file mode 100644
index 000000000..678c5b73b
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "UserTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor registry class for controlflow backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ *
+ * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getNativeTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+ {
+ return _base_reg->getNativeTensor(ind);
+ }
+
+ std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+ {
+ auto tensor = _native_user_tensors.find(ind);
+ if (tensor != _native_user_tensors.end())
+ return tensor->second;
+ return nullptr;
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setMigrantTensor(ind, tensor);
+ return true;
+ }
+
+ void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setNativeTensor(ind, tensor);
+ }
+
+ void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _native_user_tensors[ind] = tensor;
+ }
+
+ const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+ {
+ return _native_user_tensors;
+ }
+ std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
index 3c095b38c..e8f1ea679 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
@@ -55,7 +55,11 @@ void PermuteLayer::run()
try
{
const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
- _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape);
+ auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
+ if (!dyn_tensor_manager)
+ throw std::runtime_error{
+ "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+ dyn_tensor_manager->applyShape(dst_index, new_shape);
assert(dst_tensor->buffer() != nullptr);
}
catch (const std::out_of_range &e)
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
index cb27d757f..f7ce3d011 100644
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -95,17 +95,7 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index 820cad38a..440f70c93 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -26,8 +26,10 @@ namespace backend
namespace cpu_common
{
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
// DO NOTHING
}
@@ -78,7 +80,7 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
_tensors->setNativeTensor(ind, tensor);
_as_constants[ind] = as_const;
}
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 33b428a4b..93dbbc3b5 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -134,6 +134,12 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
backend::controlflow::Config::ID;
}
+ // FIXME This is a workaround for bcq operations, should remove it
+ {
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ }
+
{
VERBOSE(Compiler) << std::boolalpha;
VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
@@ -181,14 +187,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
// Lower: Assign backend
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs;
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
_options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
// Lower: Assign backend
- lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options);
+ lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
// Check backend(s) for subgraph support FP16
bool backends_support_fp16 = true;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 82afd9e56..062c6c9c3 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -25,6 +25,7 @@
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
#include "backend/IConstantInitializer.h"
#include "backend/IKernelGenerator.h"
#include "backend/IOptimizer.h"
@@ -64,6 +65,23 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
+// TODO Think of a better way to manage TensorManagers
+backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+{
+ backend::TensorManagerSet tensor_mgrs;
+ for (auto &tensor_builder : tensor_builders)
+ {
+ auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
+ if (s_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(s_tensor_manager));
+
+ auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
+ if (d_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(d_tensor_manager));
+ }
+ return tensor_mgrs;
+}
+
} // namespace
} // namespace onert
@@ -87,14 +105,14 @@ ExecutorFactory::ExecutorFactory()
std::placeholders::_3, true);
}
-exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
}
-void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
{
struct Entry
{
@@ -132,7 +150,7 @@ void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
}
}
-void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
for (const auto index : order)
@@ -141,6 +159,8 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
const auto backend = lowered_graph->getLowerInfo(index)->backend();
const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
if (tensor_register)
{
// Custom registration
@@ -154,7 +174,7 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
const auto &op = lowered_graph->graph().operations().at(op_idx);
for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- if (!tensor_builder->isRegistered(index))
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
{
const auto &operand_lower_info =
lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
@@ -181,15 +201,28 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
}
std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices)
{
std::vector<std::shared_ptr<backend::ITensor>> ret;
- TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false};
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder =
- tensor_builders.getControlflowTensorBuilder();
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_builder =
+ std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
assert(cf_tensor_builder);
+ assert(cf_tensor_reg);
for (auto ind : indices)
{
@@ -200,15 +233,16 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
cf_tensor_builder->dynamicTensorManager());
// Add tensor to controlflow TensorRegistry.
- cf_tensor_builder->setUserTensor(ind, tensor);
+ cf_tensor_reg->setNativeUserTensor(ind, tensor);
ret.push_back(tensor);
}
return ret;
}
-void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders)
+void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
{
+ TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
@@ -219,20 +253,20 @@ void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
// If an OpSequence input/output tensor does not have a own tensor object,
// it must be using external tensors, so find the tensor from other tensor builders and
// set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_builder->tensorAt(ind))
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- auto tensor = tensor_builders.getITensor(ind);
- assert(tensor); // The tensor must have been created in one of TensorBuilders
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
if (ptensor)
- backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
}
});
}
exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
@@ -277,13 +311,14 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
Linear::planTensors(*lowered_graph, order);
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
for (auto &tensor_builder : tensor_builders)
{
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -296,7 +331,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
if (cf_kernel_gen != nullptr)
{
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
@@ -335,9 +370,10 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
});
}
- auto exec =
- new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map), order};
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+ auto exec = new exec::LinearExecutor{
+ std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map), order};
if (!options.trace_filepath.empty())
{
@@ -350,7 +386,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
{
const auto &backend_contexts = lowered_graph->backend_contexts();
@@ -369,6 +405,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
}
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
// To make tensors never be deallocated, this is a workaround to use static memory planner
for (auto &tensor_builder : tensor_builders)
@@ -387,7 +424,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -401,7 +438,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (cf_kernel_gen != nullptr)
{
assert(cf_kernel_gen != nullptr);
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
@@ -440,17 +477,20 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
});
}
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
}
else
{
- auto dataflow_exec =
- new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 418e5a764..b8893c03b 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,8 +21,8 @@
#include "backend/ITensor.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
-#include "TensorBuilders.h"
+#include "compiler/LoweredGraph.h"
+#include "TensorRegistries.h"
namespace onert
{
@@ -35,7 +35,7 @@ public:
static ExecutorFactory &get();
public:
- exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
@@ -43,28 +43,27 @@ private:
ExecutorFactory();
private:
- static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
- static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+ static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
+ static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
static std::vector<std::shared_ptr<backend::ITensor>>
- initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices);
- static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders);
+ static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
- createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
static exec::IExecutor *
- createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
private:
- std::unordered_map<
- std::string, std::function<exec::IExecutor *(
- std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ std::unordered_map<std::string, std::function<exec::IExecutor *(
+ std::unique_ptr<compiler::LoweredGraph>,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
_map;
};
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c4b84ec0..23a6a253d 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -44,7 +44,7 @@ namespace onert
namespace compiler
{
-Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
+Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
: _lowered_graph{lowered_graph}
{
VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
index 5dbf74472..eeecb9846 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
#define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
@@ -28,7 +28,7 @@ namespace compiler
class Fp32ToFp16Converter
{
public:
- Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph);
+ Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph);
public:
void run();
@@ -89,7 +89,7 @@ private:
void convertOperandsOfOpSequence(ir::OpSequence &op_seq);
private:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
OpSeqIndexList _list_fp32_to_fp16;
OpSeqIndexList _list_fp16_to_fp32;
};
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index de9b4fbd0..5653b090e 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -54,42 +54,10 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
return false;
}
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
- const ir::Operation &node, bool quant)
+static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
+ bool)
{
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for add without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- if (backend->config()->id() == "cpu" &&
- (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
- node.opcode() == ir::OpCode::Mul))
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
- /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
- * without broadcasting*/
- if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for Mul without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- // Nontrivial broadcasting isn't supported yet
- if (quant ||
- !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
+ // Now, there is no workaround
return false;
}
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index d8ceca9c8..b9cee5881 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -59,6 +59,8 @@ public:
{
for (auto &entry : backend_contexts)
{
+ if (entry.first->config()->id() == backend::controlflow::Config::ID)
+ continue;
_all_backends.push_back(entry.first);
}
_backend_resolver = std::make_unique<compiler::BackendResolver>();
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 493ca1e43..49a989500 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -29,7 +29,7 @@ namespace onert
namespace compiler
{
-std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
{
std::vector<ir::OpSequenceIndex> order;
lowered_graph.iterateTopolOpSeqs(
@@ -39,7 +39,7 @@ std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lower
return order;
}
-void Linear::dump(const ir::LoweredGraph &lowered_graph,
+void Linear::dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
{
@@ -62,7 +62,7 @@ void Linear::dump(const ir::LoweredGraph &lowered_graph,
}
}
-void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
const auto &graph = lowered_graph.graph();
@@ -180,11 +180,9 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
tensor_builder_map[ind]->notifyLastUse(ind);
// plan for deallocation of dynamic tensor
- if (tensor_builder_map[ind]->supportDynamicTensor())
- {
- assert(tensor_builder_map[ind]->dynamicTensorManager());
- tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind);
- }
+ auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
+ if (dyn_tensor_manager)
+ dyn_tensor_manager->planDealloc(op_idx, ind);
}
}
}
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index faeff77f3..1e24cf92b 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -23,7 +23,7 @@
#include "ir/OpSequences.h"
#include "ir/Index.h"
#include "backend/ITensorBuilder.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
@@ -41,10 +41,10 @@ namespace compiler
class Linear
{
public:
- static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
- static void dump(const ir::LoweredGraph &lowered_graph,
+ static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+ static void dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const ir::LoweredGraph &lowered_graph,
+ static void planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
};
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 8aedfbdf0..1489a1884 100644
--- a/runtime/onert/core/src/ir/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -14,18 +14,18 @@
* limitations under the License.
*/
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include <assert.h>
#include <sstream>
#include "util/logging.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/ConstantLoweringPass.h"
-#include "pass/PermutationOperationPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
+#include "compiler/pass/ConstantInsertionPass.h"
+#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PermutationOperationPass.h"
+#include "compiler/pass/PermutationInsertionPass.h"
+#include "compiler/pass/PermutationEliminationPass.h"
#include "ir/GraphIterator.h"
-#include "verifier/Verifier.h"
+#include "ir/verifier/Verifier.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
@@ -34,16 +34,15 @@
namespace onert
{
-namespace ir
+namespace compiler
{
-LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
- : _graph{graph}
+LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
bool linear_executor = (options.executor == "Linear");
// Build backend contexts
- auto &backend_manager = compiler::BackendManager::get();
+ auto &backend_manager = BackendManager::get();
// Always create Controlflow backend context
auto cf_backend = backend_manager.getControlflow();
@@ -73,36 +72,37 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
// TODO Move "schedule" phase out of here
// Schedule
- std::unique_ptr<compiler::BackendResolver> backend_resolver;
+ std::unique_ptr<BackendResolver> backend_resolver;
if (options.he_scheduler)
{
- auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+ auto scheduler = HEScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
_indexed_ranks = scheduler.getIndexedRanks();
}
else
{
- auto scheduler = compiler::ManualScheduler(_backend_contexts, options);
+ auto scheduler = ManualScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
}
{
// operand::LowerInfo holder
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
- _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
- operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+ _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
});
// Make op_seqs while checking whether a node can be merged into a op_seq.
makeOpSequences(operands_lower_info, options, *backend_resolver);
- _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+ _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
assert(op_seq.operations().size() > 0);
std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
});
- _op_seqs.dump("merged and sorted operations without permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
pass::ConstantInsertionPass ci_pass(*this);
ci_pass.run();
@@ -127,17 +127,19 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
pass::PermutationEliminationPass pe_pass(*this);
pe_pass.run();
- _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump with permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
}
// Graph verifications
{
- assert(verifier::DAGChecker().verify(_graph));
- assert(verifier::EdgeConsistencyChecker().verify(_graph));
+ assert(ir::verifier::DAGChecker().verify(_graph));
+ assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
}
}
-const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+const ir::operation::LowerInfo *
+LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
{
auto itr = _lower_info_map.op_seq.find(op_seq_index);
if (itr == _lower_info_map.op_seq.end())
@@ -145,13 +147,13 @@ const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
{
_lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
{
auto &op_seq_lower_info = _lower_info_map.op_seq;
assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
@@ -165,7 +167,7 @@ void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
}
}
-const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
@@ -173,7 +175,7 @@ const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
return itr->second.get();
}
-operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
@@ -181,25 +183,26 @@ operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OperandIndex &index,
- std::unique_ptr<operand::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
{
_lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
{
_lower_info_map.operand.erase(index);
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<true>{}.iterateOpSeqs(
- *this,
- [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<true>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
@@ -209,12 +212,14 @@ void LoweredGraph::iterateTopolOpSeqs(
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn)
+ const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<false>{}.iterateOpSeqs(
- *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<false>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
@@ -223,12 +228,12 @@ void LoweredGraph::iterateTopolOpSeqs(
}
}
-OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node)
+ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node)
{
// Create a fresh op_seq with one operation, and append it to op_seqs
// Create a fresh op_seq
- auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+ auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
// Add an operation
op_seq->appendOperation(node_index);
@@ -241,21 +246,21 @@ OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &
}
void LoweredGraph::makeOpSequences(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const CompilerOptions &options, const BackendResolver &backend_resolver)
{
// if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
const int op_seq_max_node = options.op_seq_max_node;
assert(op_seq_max_node >= 0);
bool is_profiling = options.he_profiling_mode;
- OpSequence *op_seq = nullptr;
- OpSequenceIndex op_seq_index;
+ ir::OpSequence *op_seq = nullptr;
+ ir::OpSequenceIndex op_seq_index;
// NOTE: The below method appends nodes while making one op_seq if needed. If something better
// ways, happy to update this code.
- PostDfsConstIterator{}.iterate(
- _graph, [&](const OperationIndex &node_index, const Operation &node) {
+ ir::PostDfsConstIterator{}.iterate(
+ _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
// LowerInfo for in/output operands
auto backend = backend_resolver.getBackend(node_index);
@@ -269,12 +274,12 @@ void LoweredGraph::makeOpSequences(
for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
for (auto operand : node.getOutputs())
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
bool new_op_seq = (op_seq == nullptr ||
@@ -288,9 +293,9 @@ void LoweredGraph::makeOpSequences(
{
auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
- // OpSequence LowerInfo
+ // ir::OpSequence LowerInfo
setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, backend_layout));
+ std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
op_seq_index = new_op_seq_index;
op_seq = &(_op_seqs.at(new_op_seq_index));
@@ -318,16 +323,17 @@ void LoweredGraph::makeOpSequences(
}
void LoweredGraph::manipulateLowerInfo(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary)
{
- const auto controlflow_backend = compiler::BackendManager::get().getControlflow();
+ const auto controlflow_backend = BackendManager::get().getControlflow();
// TODO Rather than handling primary graph specially,
// let the permute inserted and remove it later
if (is_primary)
{
// TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC};
+ auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(index);
@@ -355,9 +361,9 @@ void LoweredGraph::manipulateLowerInfo(
else
{
// In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
@@ -368,15 +374,15 @@ void LoweredGraph::manipulateLowerInfo(
if (lower_info->def_factors().size() == 0)
{
// In case of that an operand is Graph's output and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
// Set LowerInfo for each operand from the operand::LowerInfo holder
- _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
setLowerInfo(index, std::move(operands_lower_info[index]));
});
}
@@ -388,11 +394,11 @@ void LoweredGraph::dumpLowerInfo()
std::map<uint32_t, std::string> dumps;
- _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
std::stringstream sstream;
if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
{
- auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+ auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
std::string str;
for (auto factor : factors)
{
@@ -403,7 +409,7 @@ void LoweredGraph::dumpLowerInfo()
return "{ " + str + "}";
};
- auto operation_index_to_string = [](const OperationIndexSet &operations) {
+ auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
std::string str;
for (auto op : operations)
{
@@ -427,8 +433,8 @@ void LoweredGraph::dumpLowerInfo()
sstream << (shape.dim(i)) << " ";
}
sstream << "}" << std::endl;
- sstream << " - Def Operations : " << def_ops << std::endl;
- sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Def ir::Operations : " << def_ops << std::endl;
+ sstream << " - Use ir::Operations : " << use_ops << std::endl;
sstream << " - Lower Info" << std::endl;
sstream << " - Def Backends : " << def_layouts << std::endl;
sstream << " - Use Backends : " << use_layouts << std::endl;
@@ -445,8 +451,9 @@ void LoweredGraph::dumpLowerInfo()
}
}
-bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver)
+bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
+ const ir::OperationIndex &node_index, ir::Layout layout,
+ const BackendResolver &backend_resolver)
{
// Are they mergeable?
// 1. the same backend id and layout?
@@ -470,10 +477,10 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
// Branched?
{
- std::unordered_set<OperationIndex> branched_set;
+ std::unordered_set<ir::OperationIndex> branched_set;
// Check for branching up
- for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
const auto &input_obj = _graph.operands().at(input);
auto def = input_obj.getDef();
@@ -489,7 +496,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
branched_set.clear();
// Check for branching down
- for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
{
// TODO Fix this workaround for the case of model outputs that are used by another operation
// This is needed since the branching is decided by operation, but for model outputs,
@@ -516,7 +523,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
const auto &node_outputs = node.getOutputs();
// op_seq's operations are in order so that we just check the first and the last
- std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]};
+ std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
if (op_seq.operations().size() > 1)
op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
@@ -556,5 +563,5 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
return false;
}
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index 1d591ae3c..ed49ee56f 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -40,7 +40,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
const auto &manual_options = _options.manual_scheduler_options;
auto backend_resolver = std::make_unique<compiler::BackendResolver>();
- // This fallback will be used for unavailable backends
+ // This fallback will be used in case that `backend_for_all` is unavailable
auto fallback = [&]() -> const backend::Backend * {
for (auto backend_id : _options.backend_list)
{
@@ -50,7 +50,8 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
}
return nullptr;
}();
- assert(fallback != nullptr); // There must be at least one fallback
+ if (fallback == nullptr)
+ throw std::runtime_error{"No loaded backends available."};
// 1. Backend for All operations
const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback);
@@ -110,7 +111,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
const backend::Backend *fallback)
{
- // Ensure if the backend is available in the backend
+ // Ensure if the backend is available in the current backend context
const backend::Backend *backend = BackendManager::get().get(id);
if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
{
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
index 44496318f..f7f659e3e 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -68,19 +68,6 @@ void OperationValidator::operator()()
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
-void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
void OperationValidator::visit(const ir::operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
@@ -125,17 +112,6 @@ void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
OP_REQUIRES(input_shape.C == output_shape.C);
}
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::Comparison &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -177,6 +153,17 @@ void OperationValidator::visit(const ir::operation::InstanceNorm &node)
OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
}
+void OperationValidator::visit(const ir::operation::Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
void OperationValidator::visit(const ir::operation::Permute &node)
{
VERBOSE(Permute) << "Configure Permute operation" << std::endl;
@@ -298,8 +285,6 @@ void OperationValidator::visit(const ir::operation::RNN &node)
num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
}
-void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -353,6 +338,51 @@ void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
}
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
+{
+ checkUnaryOp(node);
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ OP_REQUIRES(node.getInputs().size() == 1);
+ OP_REQUIRES(node.getOutputs().size() == 1);
+
+ // Check if I/O types match
+ if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+ }
+ else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ }
+ else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+ }
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -389,8 +419,6 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
}
}
-void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -405,8 +433,6 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
}
-void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::HashtableLookup &node)
{
const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
@@ -495,21 +521,6 @@ void OperationValidator::visit(const ir::operation::Gather &node)
OP_REQUIRES(ofm_shape.rank() <= 4);
}
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
-
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::DepthToSpace &node)
{
// param check
@@ -822,30 +833,6 @@ void OperationValidator::visit(const ir::operation::Pad &node)
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Min &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -899,12 +886,6 @@ void OperationValidator::visit(const ir::operation::Split &node)
OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
}
-void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::Shape &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -961,12 +942,6 @@ void OperationValidator::visit(const ir::operation::While &node)
// TODO Add to validate with subgraphs
}
-void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SquaredDifference &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1027,16 +1002,6 @@ void OperationValidator::visit(const ir::operation::Tile &node)
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1084,24 +1049,5 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node)
OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Quantize &node)
-{
- VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
-
- OP_REQUIRES(node.getInputs().size() == 1);
- OP_REQUIRES(node.getOutputs().size() == 1);
-
- const auto input_index{node.getInputs().at(0)};
- const auto output_index{node.getOutputs().at(0)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
index b27e6863c..deb6357bb 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -44,58 +44,45 @@ public:
void operator()();
public:
- void visit(const ir::operation::Abs &node) override;
- void visit(const ir::operation::AvgPool2D &node) override;
void visit(const ir::operation::BatchMatMul &node) override;
void visit(const ir::operation::BatchToSpaceND &node) override;
- void visit(const ir::operation::Cast &node) override;
void visit(const ir::operation::Comparison &node) override;
void visit(const ir::operation::Softmax &node) override;
void visit(const ir::operation::InstanceNorm &node) override;
void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::Pool2D &node) override;
void visit(const ir::operation::Reduce &node) override;
void visit(const ir::operation::Transpose &node) override;
void visit(const ir::operation::RNN &node) override;
- void visit(const ir::operation::Round &node) override;
void visit(const ir::operation::SpaceToBatchND &node) override;
void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::ElementwiseActivation &node) override;
+ void visit(const ir::operation::ElementwiseBinary &node) override;
+ void visit(const ir::operation::ElementwiseUnary &node) override;
void visit(const ir::operation::EmbeddingLookup &node) override;
- void visit(const ir::operation::Exp &node) override;
void visit(const ir::operation::ExpandDims &node) override;
- void visit(const ir::operation::Floor &node) override;
void visit(const ir::operation::HashtableLookup &node) override;
void visit(const ir::operation::TransposeConv &node) override;
void visit(const ir::operation::Gather &node) override;
- void visit(const ir::operation::Dequantize &node) override;
void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::Pack &node) override;
void visit(const ir::operation::LSTM &node) override;
void visit(const ir::operation::L2Normalization &node) override;
void visit(const ir::operation::Unpack &node) override;
void visit(const ir::operation::Pad &node) override;
- void visit(const ir::operation::Min &node) override;
- void visit(const ir::operation::Max &node) override;
void visit(const ir::operation::Select &node) override;
void visit(const ir::operation::StridedSlice &node) override;
void visit(const ir::operation::Split &node) override;
- void visit(const ir::operation::Cos &node) override;
- void visit(const ir::operation::Sin &node) override;
- void visit(const ir::operation::RSQRT &node) override;
void visit(const ir::operation::Shape &node) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &node) override;
void visit(const ir::operation::If &node) override;
void visit(const ir::operation::While &node) override;
- void visit(const ir::operation::Neg &node) override;
- void visit(const ir::operation::Log &node) override;
- void visit(const ir::operation::LogicalNot &node) override;
void visit(const ir::operation::SquaredDifference &node) override;
void visit(const ir::operation::Tile &node) override;
- void visit(const ir::operation::LogicalOr &node) override;
void visit(const ir::operation::Range &node) override;
void visit(const ir::operation::MatrixBandPart &node) override;
void visit(const ir::operation::LogSoftmax &node) override;
- void visit(const ir::operation::Quantize &node) override;
private:
void checkUnaryOp(const ir::Operation &node);
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
index 76c1edcbc..4eba1ff49 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -25,6 +25,64 @@ namespace onert
namespace compiler
{
+bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+{
+ bool has_dynamic_tensor = false;
+
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ auto &op = _operations.at(operation_idx);
+ auto opcode = op.opcode();
+
+ _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+
+ // IF: need shape inference for then, else
+ // While: need shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ {
+ op.accept(*this);
+ }
+ else
+ {
+ _return_has_dynamic_tensor = checkDynamicInput(op);
+
+ if (_return_has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
+ }
+
+ has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+ }
+
+ return has_dynamic_tensor;
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
+{
+ for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (_operands.at(input_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
+{
+ for (auto output_idx : op.getOutputs())
+ {
+ _operands.at(output_idx).info().setDynamic();
+ }
+}
+
void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
@@ -35,13 +93,6 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
output.info().shape(new_shape);
@@ -56,14 +107,6 @@ void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = input.info().shape();
output.info().shape(new_shape);
@@ -99,17 +142,6 @@ void StaticShapeInferer::dump()
}
}
-void StaticShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -118,15 +150,6 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -145,35 +168,22 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
const auto lhs = _operands.at(lhs_index);
const auto rhs = _operands.at(rhs_index);
auto &output = _operands.at(output_index);
-
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic.
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
const auto &shape = _operands.at(shape_idx);
@@ -192,11 +202,6 @@ void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -215,14 +220,6 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
{
const auto input_idx{op.getInputs().at(i)};
const auto &input = _operands.at(input_idx);
-
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
input_shapes.emplace_back(input.shape());
}
@@ -241,33 +238,26 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cos &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Div &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void StaticShapeInferer::visit(const ir::operation::Exp &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -279,13 +269,6 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!axis.isConstant())
{
output.info().setDynamic();
@@ -310,13 +293,6 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!input.isConstant())
{
output.info().setDynamic();
@@ -345,15 +321,6 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input or ker is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -376,15 +343,6 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
const auto &indices = _operands.at(indices_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || indices.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -476,27 +434,6 @@ void StaticShapeInferer::visit(const ir::operation::If &op)
}
}
-void StaticShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
@@ -507,29 +444,6 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::OneHot &op)
{
const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
@@ -542,7 +456,7 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant())
+ if (!depth.isConstant())
{
output.info().setDynamic();
_return_has_dynamic_tensor = true;
@@ -558,18 +472,6 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
void StaticShapeInferer::visit(const ir::operation::Pack &op)
{
- bool is_any_of_inputs_dynamic = [&]() -> bool {
- for (uint32_t i = 0; i < op.getInputs().size(); ++i)
- {
- const auto &input = _operands.at(op.getInputs().at(i));
- if (input.info().isDynamic())
- {
- return true;
- }
- }
- return false;
- }();
-
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
@@ -577,14 +479,6 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (is_any_of_inputs_dynamic)
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank() + 1;
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
const auto num = op.param().num;
@@ -608,14 +502,6 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic or pad is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || pad.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// if pad is not constant, output also becomes dynamic
if (!pad.isConstant())
{
@@ -638,13 +524,6 @@ void StaticShapeInferer::visit(const ir::operation::Permute &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
// Permute is a special operation that layouts of input/output may be different on backend
// However, it is not applied here, so input/output have the same layout of frontend. Because
@@ -672,13 +551,6 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if any input is dynamic, output also becomes dynamic
- if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
ir::Shape new_shape;
if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -716,14 +588,6 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
std::vector<int32_t> axes_vec;
for (size_t i = 0; i < axes.shape().num_elements(); ++i)
{
@@ -761,14 +625,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// New shape is given by second input tensor
if (op.getInputs().size() == 2)
{
@@ -827,14 +683,6 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Shape inferencing logic based on Params
ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
input.shape(), op.param().height_out, op.param().width_out);
@@ -852,16 +700,6 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
@@ -876,14 +714,6 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input_cond.info().isDynamic() || input_true.info().isDynamic() ||
- input_false.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
@@ -899,14 +729,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape output_shape;
output_shape.append(input.info().shape().rank());
@@ -914,11 +736,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
output.info().shape(output_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -930,13 +747,6 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op)
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(begins.isConstant() && sizes.isConstant()))
{
@@ -970,13 +780,6 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
const auto &block_shape = _operands.at(block_shape_idx);
const auto &padding = _operands.at(padding_idx);
- if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(block_shape.isConstant() && padding.isConstant()))
{
@@ -1006,18 +809,6 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
const auto axis = op.param().axis;
const auto num_splits = op.param().num_splits;
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
auto axis_resolved = axis < 0 ? axis + rank : axis;
@@ -1072,14 +863,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() ||
- strides.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
{
output.info().setDynamic();
@@ -1104,17 +887,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Tile &op)
{
const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
@@ -1126,13 +898,6 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!multiplier.isConstant())
{
output.info().setDynamic();
@@ -1158,13 +923,7 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
ir::Operand &output = _operands.at(output_idx);
const auto perm{op.param().perm};
// const auto rank{op.param().rank};
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
+
// set output shape, based on input and params
ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
output.info().shape(new_shape);
@@ -1175,20 +934,6 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
const auto num = op.param().num;
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -1346,11 +1091,6 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
}
}
-void StaticShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT));
-}
-
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
index c0a1ebc04..3b0360b4b 100644
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ b/runtime/onert/core/src/compiler/TensorBuilders.h
@@ -67,17 +67,6 @@ public:
return _cf_tensor_builder;
}
- std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
- {
- for (auto &tensor_builder : _tensor_builders)
- {
- auto tensor = tensor_builder->tensorAt(ind);
- if (tensor)
- return tensor;
- }
- return nullptr;
- }
-
private:
std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
new file mode 100644
index 000000000..8be87b081
--- /dev/null
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+
+#include <unordered_set>
+#include <memory>
+#include "backend/BackendContext.h"
+#include "backend/Backend.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include "backend/controlflow/TensorRegistry.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class TensorRegistries
+{
+public:
+ TensorRegistries() = default;
+
+ TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
+ bool include_controlflow)
+ {
+ for (const auto &e : backend_contexts)
+ {
+ auto tensor_reg = e.second->tensor_registry;
+ if (e.first->config()->id() == backend::controlflow::Config::ID)
+ {
+ _cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
+ if (include_controlflow)
+ _tensor_regs.insert(tensor_reg);
+ }
+ else
+ {
+ _tensor_regs.insert(tensor_reg);
+ }
+ }
+ }
+
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const
+ {
+ return _tensor_regs.cbegin();
+ }
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const
+ {
+ return _tensor_regs.cend();
+ }
+
+ std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+ {
+ return _cf_tensor_reg;
+ }
+
+ std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+ {
+ for (auto &tensor_reg : _tensor_regs)
+ {
+ auto tensor = tensor_reg->getITensor(ind);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
+private:
+ std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
+ std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 1742a0dd5..647669e46 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -22,20 +22,20 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
@@ -47,7 +47,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
auto new_object = object;
new_object.unsetDef();
// TODO Remove const_case
- const_cast<OperationIndexSet &>(new_object.getUses()).clear();
+ const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
}
@@ -89,5 +89,5 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
index 3ea4dc397..052883c92 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
#include <ir/operand/PermuteFactor.h>
#include <ir/Index.h>
@@ -25,7 +25,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -39,13 +39,13 @@ public:
std::string id() final { return "ConstantInsertionPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
private:
struct ReplaceKey
{
- OperandIndex index;
- operand::PermuteFactor factor;
+ ir::OperandIndex index;
+ ir::operand::PermuteFactor factor;
bool operator==(const ReplaceKey &other) const
{
@@ -61,15 +61,16 @@ private:
std::size_t operator()(const ReplaceKey &key) const noexcept
{
using std::hash;
- return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+ return hash<ir::OperandIndex>()(key.index) ^
+ (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
}
};
- std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+ std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
index 04f4e59c0..1c1dbe0ee 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -23,28 +23,28 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
// Now this runtime does not support the node making output of operation as constant
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
if (object.isConstant())
{
// All constant operand are already assinged at each backend by ContantInsertionPass. So a
// constant has `def` and `use` as the same PermuteFactor
- _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>());
+ _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
_lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
_lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
}
@@ -52,5 +52,5 @@ void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
index 5c9f4352b..e17d776d1 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
#include <ir/Index.h>
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -36,11 +36,11 @@ public:
std::string id() final { return "ConstantLoweringPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
index eefb8ddfb..0c5f7d745 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,11 +18,11 @@
#define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
#include "OperandPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,7 +30,7 @@ namespace pass
class LoweredOperandPass : public OperandPass
{
public:
- LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
: OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
@@ -39,14 +39,14 @@ public:
virtual ~LoweredOperandPass() = default;
std::string id() override = 0;
- void callback(const OperandIndex &i, Operand &o) override = 0;
+ void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
index 0138712d7..5c8569be2 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,11 +18,11 @@
#define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
#include "OperationPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,7 +30,7 @@ namespace pass
class LoweredOperationPass : public OperationPass
{
public:
- LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperationPass(LoweredGraph &lowered_graph)
: OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
@@ -39,14 +39,14 @@ public:
virtual ~LoweredOperationPass() = default;
std::string id() override = 0;
- void callback(const OperationIndex &i, Operation &o) override = 0;
+ void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc
index 693a0f493..50c001c30 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -20,7 +20,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -28,9 +28,9 @@ namespace pass
void OperandPass::run()
{
_graph.operands().iterate(
- [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+ [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/compiler/pass/OperandPass.h
index 393060741..b094879c5 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
@@ -30,7 +30,7 @@ class Operand;
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -44,11 +44,11 @@ public:
public:
std::string id() override = 0;
void run() override final;
- virtual void callback(const OperandIndex &i, Operand &o) = 0;
+ virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc
index 84b1da3ee..d7a55cb22 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -22,7 +22,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,9 +30,9 @@ namespace pass
void OperationPass::run()
{
_graph.operations().iterate(
- [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+ [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h
index 1733f87ed..ac4d818a2 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -19,8 +19,8 @@
* @brief This file contains OperationPass class
*/
-#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
@@ -35,7 +35,7 @@ class Operation;
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -62,7 +62,7 @@ public:
* @param index is the index of a node in graph
* @param node is the node in graph
*/
- virtual void callback(const OperationIndex &index, Operation &node) = 0;
+ virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
/**
* @brief Run the pass
@@ -71,7 +71,7 @@ public:
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h
index 1c6628f6f..3f356c337 100644
--- a/runtime/onert/core/src/ir/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PASS_H__
-#define __ONERT_GRAPH_PASS_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PASS_H__
+#define __ONERT_COMPILER_PASS_PASS_H__
#include <string>
@@ -24,12 +24,12 @@ namespace onert
namespace ir
{
class Graph;
-} // namespace ir
+} // namespace compiler
} // namespace onert
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -37,7 +37,7 @@ namespace pass
class Pass
{
public:
- Pass(Graph &graph) : _graph{graph} {}
+ Pass(ir::Graph &graph) : _graph{graph} {}
virtual ~Pass() = default;
public:
@@ -45,11 +45,11 @@ public:
virtual void run() = 0;
protected:
- Graph &_graph;
+ ir::Graph &_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 2deccd40b..f01697034 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -21,35 +21,33 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
{
_op_ind = ind;
node.accept(*this);
};
-void PermutationEliminationPass::visit(const operation::Permute &node)
+void PermutationEliminationPass::visit(const ir::operation::Permute &node)
{
auto in_operand = node.getInputs().at(0);
auto out_operand = node.getOutputs().at(0);
- // Check if two tensors are both portable
- // TODO Make this general, this is just a workaround to check two tensors are portable
+ // Check if two tensors are both portable if not, we can't eliminate the node
{
auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
- auto in_backend_id = in_def_factor.backend()->config()->id();
- auto out_backend_id = out_def_factor.backend()->config()->id();
+ auto in_config = in_def_factor.backend()->config();
+ auto out_config = out_def_factor.backend()->config();
- // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
- // This should be general.
- if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
- (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
+ // FIXME Supporting dynamic tensor does not exactly mean those are portable.
+ // It may need to have another config option for checking if each uses `IPortableTensor`.
+ if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor()))
return;
}
@@ -65,7 +63,7 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
if (!op_seq.getOutputs().contains(in_operand))
return;
- // Update OpSequence/Operation edges and Operand edges
+ // Update OpSequence/ir::Operation edges and ir::Operand edges
op_seq.replaceOutputs(in_operand, out_operand);
for (auto op : op_seq.operations())
{
@@ -106,8 +104,8 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
});
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl;
}
else
{
@@ -145,11 +143,11 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
}
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl;
}
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
index 614e44cd2..29daf1a82 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
#include "ir/OperationVisitor.h"
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -40,7 +40,7 @@ namespace pass
* @note This is an optimization pass which means that everything should work fine even if this pass
* was skipped.
*/
-class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
+class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor
{
public:
using LoweredOperationPass::LoweredOperationPass;
@@ -49,17 +49,17 @@ public:
std::string id() final { return "PermutationEliminationPass"; }
public:
- void callback(const OperationIndex &i, Operation &n) final;
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
private:
- void visit(const operation::Permute &) final;
+ void visit(const ir::operation::Permute &) final;
private:
ir::OperationIndex _op_ind;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 3578af813..c83a72ada 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -31,12 +31,12 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
{
auto &&operand_li = _lowered_graph.getLowerInfo(index);
assert(operand_li);
@@ -48,10 +48,10 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
return;
}
- std::list<OperationIndex> permute_indexes;
+ std::list<ir::OperationIndex> permute_indexes;
// Build a map for all necessary type of operands
- std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+ std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
{
assert(operand_li->def_factors().size() == 1);
for (auto factor : operand_li->def_factors())
@@ -72,7 +72,7 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
// Update operations' input that uses this operand
{
- std::list<OperationIndex> remove_list;
+ std::list<ir::OperationIndex> remove_list;
auto uses = object.getUses();
for (auto use : uses)
@@ -121,8 +121,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
}
}
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor)
+ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor)
{
assert(!_graph.isBuildingPhase());
@@ -143,14 +143,14 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
auto output_backend = factor.backend();
// NOTE Permute may not have specific layout because the layout of input and output may be
// different.
- const auto permute_node_layout = Layout::UNKNOWN;
+ const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
}
- const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+ const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
// Update LowerInfo of input operand
auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
@@ -158,7 +158,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
operand_lower_info->addUsePermuteFactor(permute_node_factor);
// Update LowerInfo of output operand
- auto out_operand_li = std::make_unique<operand::LowerInfo>();
+ auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
// The input and output factors of all nodes will be the same except Permute. So Tensor's
// allocators allocates memory using only the information of def permutation factor now.
@@ -170,13 +170,13 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
// Insert permute operation to the graph
const auto input_layout = input_factor.layout();
const auto output_layout = factor.layout();
- using Permute = operation::Permute;
+ using Permute = ir::operation::Permute;
const auto permute_type = [&]() {
- if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+ if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW)
{
return Permute::Type::NHWC_TO_NCHW;
}
- else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+ else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC)
{
return Permute::Type::NCHW_TO_NHWC;
}
@@ -200,7 +200,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
op_seq.setInputs(node.getInputs());
op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+ _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
permute_node_backend, permute_node_layout));
}
@@ -212,5 +212,5 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
return node_index;
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
index 6c30c6f12..758515385 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -14,17 +14,17 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
#include "LoweredOperandPass.h"
#include "compiler/BackendManager.h"
-#include "ir/Operand.h" //for OperationIndex
+#include "ir/Operand.h"
#include "ir/operand/PermuteFactor.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -36,7 +36,7 @@ public:
public:
std::string id() override { return "PermutationInsertionPass"; }
- void callback(const OperandIndex &index, Operand &object) override;
+ void callback(const ir::OperandIndex &index, ir::Operand &object) override;
private:
/**
@@ -45,14 +45,14 @@ private:
* @param operand_index is the target operand index for the insertion
* @param factor is the output operand's backend type and layout
*
- * @return OperationIndex
+ * @return ir::OperationIndex
*/
- OperationIndex insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor);
+ ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor);
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index 6eb412cf1..c5c95c726 100644
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -23,11 +23,13 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
+using namespace ir;
+
void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
{
node.accept(*this);
@@ -70,7 +72,7 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
"operand used in more than one node");
// TODO remove const_cast later. For example, _ctx may need to be a non const variable or
// a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank);
+ const_cast<Shape &>(operand.shape()).extendRank(expanded_rank);
}
}
}
@@ -134,7 +136,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
_lowered_graph.setLowerInfo(
next_op_seq_index,
- std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+ std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
}
}
@@ -164,8 +166,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
new_op_seq.setInputs(node.getInputs());
new_op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+ _lowered_graph.setLowerInfo(
+ new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
}
// Change PermuteFactors of operands of target node
@@ -175,7 +177,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
const auto backend = op_seq_li->backend();
const operand::PermuteFactor removed_factor{backend, backend_layout};
const operand::PermuteFactor new_factor{backend, frontend_layout};
- for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
bool canRemove = true;
for (const auto &use : _graph.operands().at(input).getUses())
@@ -227,17 +229,31 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
}
}
-void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Comparison &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
+void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+ applyExpandRanks(node);
+}
+
+void PermutationOperationPass::visit(const ir::operation::FullyConnected &node)
+{
+ const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -247,9 +263,9 @@ void PermutationOperationPass::visit(const operation::FullyConnected &node)
}
}
-void PermutationOperationPass::visit(const operation::Gather &node)
+void PermutationOperationPass::visit(const ir::operation::Gather &node)
{
- const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -263,21 +279,9 @@ void PermutationOperationPass::visit(const operation::Gather &node)
}
}
-void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Pack &node)
+void PermutationOperationPass::visit(const ir::operation::Pack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -291,11 +295,11 @@ void PermutationOperationPass::visit(const operation::Pack &node)
}
}
-void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Reshape &node)
+void PermutationOperationPass::visit(const ir::operation::Reshape &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -309,16 +313,14 @@ void PermutationOperationPass::visit(const operation::Reshape &node)
}
}
-void PermutationOperationPass::visit(const operation::SquaredDifference &node)
+void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node)
{
applyExpandRanks(node);
}
-void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Unpack &node)
+void PermutationOperationPass::visit(const ir::operation::Unpack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -333,5 +335,5 @@ void PermutationOperationPass::visit(const operation::Unpack &node)
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
new file mode 100644
index 000000000..2dd76b971
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor
+{
+public:
+ using LoweredOperationPass::LoweredOperationPass;
+
+public:
+ std::string id() final { return "PermutationOperationPass"; }
+
+public:
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+
+public:
+ void visit(const ir::operation::BinaryArithmetic &) final;
+ void visit(const ir::operation::Comparison &) final;
+ void visit(const ir::operation::Concat &) final;
+ void visit(const ir::operation::ElementwiseBinary &) final;
+ void visit(const ir::operation::ElementwiseUnary &) final;
+ void visit(const ir::operation::Pack &) final;
+ void visit(const ir::operation::PReLU &) final;
+ void visit(const ir::operation::SquaredDifference &) final;
+ void visit(const ir::operation::Unpack &) final;
+ void visit(const ir::operation::FullyConnected &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::Reshape &) final;
+
+private:
+ void applyExpandRanks(const ir::Operation &);
+ void changeToKeepLayout(const ir::Operation &);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index 668785a81..fdbca1642 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
*/
#include "ir/Graph.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
#define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -42,7 +42,7 @@ public:
: _lowered_graph{nullptr}, _graph(graph), _level{level}
{
}
- DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+ DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
: _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
{
}
@@ -57,7 +57,7 @@ public:
void dump(const std::string &tag);
private:
- const ir::LoweredGraph *_lowered_graph;
+ const compiler::LoweredGraph *_lowered_graph;
const ir::Graph &_graph;
Level _level;
};
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index cb516b53a..a69ae9cdb 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,13 @@ bool DataflowExecutor::noWaitingJobs()
}
DataflowExecutor::DataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders},
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index aebb03c23..8d60e3e4b 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -49,10 +49,11 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc
index 5ec7012ee..70bddfce4 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -100,17 +100,6 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -155,6 +144,12 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
}
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -179,11 +174,6 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -292,20 +282,20 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cos &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Div &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void DynamicShapeInferer::visit(const ir::operation::Exp &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -430,27 +420,6 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
@@ -461,29 +430,6 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -766,7 +712,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -776,16 +722,6 @@ void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
@@ -836,11 +772,6 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -1003,17 +934,6 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Tile &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -1091,10 +1011,5 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
}
}
-void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT));
-}
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 5b401ecf8..7feb3ab68 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -38,7 +38,10 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
if (_io_desc.inputs.at(index.value()) != 0)
throw std::runtime_error("Error in calling order");
- _io_desc.input_shape_signature[index] = new_shape;
+ // This will be used later to set input tensor dynamic
+ // Note that 'compiled' model will not be updated with new_shape
+ // but new_shape will change model input shape while 'running' the model
+ _io_desc.dynamic_input_shapes[index] = new_shape;
}
// TODO Remove default parameter
@@ -54,8 +57,8 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
// if input_shape_sig is set, input_shape_sig overrides shape in info
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
{
- auto input_shape_sig = _io_desc.input_shape_signature.find(index);
- auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end())
+ auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
+ auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
? input_shape_sig->second.num_elements() *
onert::ir::sizeOfDataType(info.typeInfo().type())
: info.total_size();
@@ -154,8 +157,8 @@ bool Execution::isFinished(void) const { return finished; }
ir::Shape Execution::getInputShape(ir::IOIndex ind) const
{
- auto itr = _io_desc.input_shape_signature.find(ind);
- if (itr == _io_desc.input_shape_signature.end())
+ auto itr = _io_desc.dynamic_input_shapes.find(ind);
+ if (itr == _io_desc.dynamic_input_shapes.end())
{
auto operand_idx = primary_subgraph().getInputs().at(ind.value());
return primary_subgraph().operands().at(operand_idx).shape();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 864ccb31a..f835a9675 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -26,12 +26,14 @@ namespace onert
namespace exec
{
-ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders)
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs)
: _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+ _input_tensors{input_tensors}, _output_tensors{output_tensors},
+ _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
{
// TODO Fix the way of knowing whether it is primary or not
bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
@@ -41,23 +43,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
@@ -66,23 +55,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
@@ -92,42 +68,23 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
}
else
{
- // If primary graph, all the inputs and outputs belong to controlflow backend
- auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
- assert(cf_dyn_tensor_builder);
-
assert(input_tensors.size() == _graph.getInputs().size());
assert(output_tensors.size() == _graph.getOutputs().size());
for (uint32_t i = 0; i < input_tensors.size(); i++)
{
auto tensor = input_tensors[i];
auto ind = _graph.getInputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
for (uint32_t i = 0; i < output_tensors.size(); i++)
{
auto tensor = output_tensors[i];
auto ind = _graph.getOutputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
}
-
- // Prepare each TensorManager on each backend
- for (auto &tensor_builder : tensor_builders)
- {
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(s_tensor_manager));
-
- if (tensor_builder->supportDynamicTensor())
- {
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(d_tensor_manager));
- }
- }
}
void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
@@ -192,8 +149,8 @@ void ExecutorBase::execute(const IODescription &desc)
// TODO Remove dynamic_cast
auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
assert(tensor);
- auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
- if (input_shape != desc.input_shape_signature.end())
+ auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
+ if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
@@ -258,8 +215,8 @@ void ExecutorBase::execute(const IODescription &desc)
*/
void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
{
- auto shape_sig_found = desc.input_shape_signature.find(io_ind);
- if (shape_sig_found != desc.input_shape_signature.end())
+ auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
+ if (shape_sig_found != desc.dynamic_input_shapes.end())
{
auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
@@ -269,7 +226,9 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript
auto changed_input_shape = shape_sig_found->second;
auto operand_ind = dyn_alloc_info->second.ind;
- dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+ auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
+ assert(dyn_tensor_manager);
+ dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
}
}
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index 080c9bbdd..a13be7dbf 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -25,7 +25,7 @@
#include "Sink.h"
#include "ShapeConverter.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/LowerInfoMap.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
@@ -33,9 +33,8 @@
#include "exec/IFunction.h"
#include "backend/IDynamicTensorManager.h"
#include "backend/ITensorManager.h"
-#include "backend/ITensorBuilder.h"
#include "exec/ExecutionObservee.h"
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
#include <list>
namespace onert
@@ -51,10 +50,11 @@ public:
* @param graph Graph object
* @param tensor_builders Tensor builders that are currently used
*/
- ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs);
virtual ~ExecutorBase() = default;
@@ -102,7 +102,7 @@ protected:
protected:
ExecutionObservee _subject;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+ std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index d413e8162..fb31f7582 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -28,7 +28,8 @@ namespace exec
void FunctionSequence::run()
{
- if (_enable_dynamic_shape_inferer)
+ // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
+ if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
{
if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
throw std::runtime_error("operation and functions should be mapped one by one");
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 5c099bc16..c224d3f4f 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -46,12 +46,14 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map,
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)}
{
for (auto index : order)
{
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index b5d81778f..ab234aacd 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
}
ParallelExecutor::ParallelExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders,
- std::move(code_map)}
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 462cbc6a8..929edfce9 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -50,10 +50,11 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index 48642d8ef..7be9df4d5 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -33,7 +33,7 @@ namespace feature
namespace nchw
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
@@ -68,15 +68,14 @@ public:
}
public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, ch, row, col);
}
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
{
const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h
index ff55de199..dbaf1a91e 100644
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
#define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include "backend/ITensor.h"
#include "ir/Shape.h"
@@ -34,99 +34,31 @@ namespace feature
namespace nchw
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(::onert::backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NCHW);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ View(::onert::backend::ITensor *tensor) : Reader<T>{tensor}
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ // DO NOTHING
}
public:
- T &at(uint32_t ch, uint32_t row, uint32_t col)
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
{
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.C > ch); // shape.C > ch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += ch * _strides.C;
- res += row * _strides.H;
- res += col * _strides.W;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, ch, row, col));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nchw
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index ef27992c3..7730cee72 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -34,7 +34,7 @@ namespace feature
namespace nhwc
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
@@ -70,15 +70,14 @@ public:
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final
{
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, row, col, ch);
}
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
{
const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index a09961a84..72c8c3415 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -35,101 +35,31 @@ namespace feature
namespace nhwc
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ View(backend::ITensor *tensor) : Reader<T>{tensor}
{
- assert(tensor->layout() == ir::Layout::NHWC);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ // DO NOTHING
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
- T &at(uint32_t row, uint32_t col, uint32_t ch)
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
{
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+ T &at(uint32_t row, uint32_t col, uint32_t ch)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
- assert(1u * _shape.C > ch); // shape.C > ch
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += row * _strides.H;
- res += col * _strides.W;
- res += ch * _strides.C;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, row, col, ch));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nhwc
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
index 5f646b83f..0714df38a 100644
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ b/runtime/onert/core/src/interp/InterpOps.lst
@@ -22,43 +22,32 @@
//
// Same list with Operations.lst
// Make comment out if operation is not supported in interpreter
-INTERP_OP(Add)
-INTERP_OP(Sub)
+INTERP_OP(BinaryArithmetic)
//INTERP_OP(BatchToSpaceND)
//INTERP_OP(Cast)
INTERP_OP(Conv2D)
INTERP_OP(DepthwiseConv2D)
-INTERP_OP(AvgPool2D)
-INTERP_OP(MaxPool2D)
+INTERP_OP(Pool2D)
INTERP_OP(Concat)
INTERP_OP(FullyConnected)
//INTERP_OP(Reduce)
INTERP_OP(Reshape)
-INTERP_OP(Mul)
INTERP_OP(Softmax)
//INTERP_OP(Squeeze)
//INTERP_OP(Slice)
//INTERP_OP(StridedSlice)
-INTERP_OP(Tanh)
-INTERP_OP(Logistic)
-//INTERP_OP(Div)
+INTERP_OP(ElementwiseActivation)
//INTERP_OP(Transpose)
//INTERP_OP(Exp)
//INTERP_OP(Comparison)
-//INTERP_OP(LogicalAnd)
-//INTERP_OP(LogicalOr)
//INTERP_OP(LogicalNot)
//INTERP_OP(LSTM)
//INTERP_OP(RSQRT)
-INTERP_OP(ReLU)
//INTERP_OP(ResizeBilinear)
-INTERP_OP(ReLU1)
-INTERP_OP(ReLU6)
//INTERP_OP(RNN)
//INTERP_OP(Floor)
//INTERP_OP(SpaceToBatchND)
//INTERP_OP(SpaceToDepth)
-//INTERP_OP(L2Pool2D)
//INTERP_OP(EmbeddingLookup)
//INTERP_OP(L2Normalization)
//INTERP_OP(HashtableLookup)
@@ -81,6 +70,4 @@ INTERP_OP(Gather)
INTERP_OP(Pad)
//INTERP_OP(Custom)
//INTERP_OP(Permute)
-//INTERP_OP(Min)
-//INTERP_OP(Max)
//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 44c955421..86e883524 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -19,9 +19,7 @@
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "misc/polymorphic_downcast.h"
#include "cker/Types.h"
@@ -39,12 +37,13 @@ enum class OpType
MUL
};
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+void prepare(ExecEnv *env, const ir::Operation &node)
{
- const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
- const auto lhs_index = node.getInputs().at(add_node.LHS);
- const auto rhs_index = node.getInputs().at(add_node.RHS);
+ const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+ const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
@@ -54,7 +53,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != rhs_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Different input types"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
}
bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
@@ -65,7 +64,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
rhs_tensor->tensorInfo().shape(), success);
if (!success)
{
- throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
}
auto output_info =
@@ -86,7 +85,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != out_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Invalid output type"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
}
}
@@ -103,9 +102,9 @@ inline void setActivationParams(int32_t min, int32_t max,
params->quantized_activation_max = max;
}
-template <typename raw_type, typename param_type, OpType op_type>
+template <typename raw_type, OpType op_type>
void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const param_type &param)
+ const ir::operation::BinaryArithmetic::Param &param)
{
const auto lhs_buffer = lhs_tensor->bufferRO();
const auto rhs_buffer = rhs_tensor->bufferRO();
@@ -146,13 +145,11 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
out_shape, out_ptr);
}
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+template <OpType op_type>
+void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
{
- const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+ const auto lhs_index = node.getInputs().at(node.LHS);
+ const auto rhs_index = node.getInputs().at(node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
const auto rhs_tensor = env->tensorAt(rhs_index);
@@ -161,38 +158,46 @@ void invokeAdd(const ExecEnv *env, const ir::Operation &node)
if (data_type == ir::DataType::INT32)
{
- invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
- arithmetic_node.param());
+ invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else if (data_type == ir::DataType::FLOAT32)
{
- invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+ invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else
{
throw std::runtime_error{"NYI: Unsupported data type"};
}
}
-} // namespace
-OpKernel *getAdd()
+void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepareAdd<ir::operation::Add>,
- invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
- return &kernel;
-}
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-OpKernel *getSub()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
- invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
- return &kernel;
+ switch (arithmetic_node.param().arithmetic_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
+ break;
+ default:
+ throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
+ arithmetic_node.name()};
+ break;
+ }
}
-OpKernel *getMul()
+} // namespace
+
+OpKernel *getBinaryArithmetic()
{
- static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
- invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+ static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index ea5e2417b..c8773bef4 100644
--- a/runtime/onert/core/src/interp/operations/UnaryActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -20,10 +20,11 @@
#include "interp/Registration.h"
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <cker/operation/Logistic.h>
+#include <cker/operation/Tanh.h>
namespace onert
{
@@ -34,9 +35,8 @@ namespace
enum class ActivationType
{
+ Logistic,
ReLU,
- ReLU1,
- ReLU6,
Tanh
};
@@ -65,30 +65,25 @@ void prepare(ExecEnv *env, const ir::Operation &node)
// TODO Util function to compare TensorInfo
if (input_tensor->data_type() != output_tensor->data_type())
{
- throw std::runtime_error{"Interp(Activations): Invalid output type"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
}
}
template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
+ float beta)
{
std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
switch (act_type)
{
case ActivationType::ReLU:
- fn = [](const float &in) { return std::max(0.f, in); };
- break;
- case ActivationType::ReLU1:
- fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
- break;
- case ActivationType::ReLU6:
- fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+ fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
break;
case ActivationType::Tanh:
fn = [](const float &in) { return std::tanh(in); };
break;
default:
- throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
break;
}
@@ -114,38 +109,51 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
uint64_t elements = input_tensor->num_elements();
const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
- evalFloat<act_type>(input_start, out, elements);
+ if (act_type == ActivationType::Logistic)
+ {
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
+ }
+ else
+ {
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
+ act_node.param().beta);
+ }
}
else
{
- throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+ throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
}
}
-} // namespace
-
-OpKernel *getReLU()
+void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
- return &kernel;
-}
-
-OpKernel *getReLU1()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
- return &kernel;
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ switch (act_node.param().op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ invoke<ActivationType::Logistic>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ invoke<ActivationType::ReLU>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ invoke<ActivationType::Tanh>(env, node);
+ break;
+ default:
+ throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
+ }
}
-OpKernel *getReLU6()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
- return &kernel;
-}
+} // namespace
-OpKernel *getTanh()
+OpKernel *getElementwiseActivation()
{
- static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+ static OpKernel kernel = {prepare, invokeElementwiseActivation};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc
deleted file mode 100644
index c23cbb782..000000000
--- a/runtime/onert/core/src/interp/operations/Logistic.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Logistic): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
- static OpKernel kernel = {prepareLogistic, invokeLogistic};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc
deleted file mode 100644
index 313948fb6..000000000
--- a/runtime/onert/core/src/interp/operations/MaxPool2D.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
-
- assert(in_tensor->num_dimensions() == 4);
- UNUSED_RELEASE(in_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
- const auto infered_output_shape =
- shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::MaxPool2D::Param &param)
-{
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensor, out_tensor, maxpool_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
- static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 42fe42301..92f9d70b2 100644
--- a/runtime/onert/core/src/interp/operations/AvgPool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -15,11 +15,12 @@
*/
#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "util/Utils.h"
#include "util/ShapeInference.h"
#include "misc/polymorphic_downcast.h"
@@ -28,12 +29,13 @@ namespace onert
{
namespace interp
{
-namespace avgpool2d
+namespace pool2d
{
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+void preparePool2D(ExecEnv *env, const ir::Operation &node)
{
- const auto in_index = node.getInputs().at(0);
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+ const auto in_index = node.getInputs().at(pool_node.INPUT);
const auto out_index = node.getOutputs().at(0);
const auto in_tensor = env->tensorAt(in_index);
@@ -45,10 +47,8 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
if (output_info.total_size() == 0)
{
// Handle unspecified output shape
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
const auto infered_output_shape =
- shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+ shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
env->allocateIfNeeded(
out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
@@ -65,18 +65,44 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
assert(out_tensor->num_dimensions() == 4);
}
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::AvgPool2D::Param &param)
+template <typename T>
+void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
+ const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
+ ir::operation::Pool2D::PoolType op_type)
{
- // TODO Support NCHW frontend
+ switch (op_type)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ case ir::operation::Pool2D::PoolType::MAX:
+ nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ default:
+ throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
+ break;
+ }
+}
+
+void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ // TODO support NCHW frontend
const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto param = pool_node.param();
const auto padding =
ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
// Calculate
nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
cker_param.filter_width = param.kw;
cker_param.filter_height = param.kh;
cker_param.padding_values.width = padding.left;
@@ -84,41 +110,29 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
cker_param.stride_width = param.stride.horizontal;
cker_param.stride_height = param.stride.vertical;
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
const auto data_type = in_tensor->data_type();
if (data_type == ir::DataType::FLOAT32)
{
- invoke(in_tensor, out_tensor, avgpool_node.param());
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+ // Now, invoke() supports only Pool2D in float
+ invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
}
else
{
throw std::runtime_error{"NYI: Support float only"};
}
}
-} // namespace avgpool2d
+} // namespace pool2d
-OpKernel *getAvgPool2D()
+OpKernel *getPool2D()
{
- static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+ static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 6d9359e1e..d30f78deb 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -29,43 +29,6 @@ namespace interp
namespace
{
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
-{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
- {
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
- }
-}
-
void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
{
const auto in_index = node.getInputs().at(0);
@@ -108,7 +71,7 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
uint32_t batch_size = in_tensor->dimension(0);
uint32_t input_size = in_tensor->dimension(1);
- Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+ nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
}
else if (in_tensor->num_dimensions() == 4)
{
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index 0db9b6133..fe8b1b443 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -56,18 +56,34 @@ void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
_operands.at(ind).data(std::move(data));
}
-void Graph::addInput(const OperandIndex &ind)
+void Graph::addInput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_input.emplace(name, IOIndex{_inputs.size()});
_inputs.append(ind);
}
-void Graph::addOutput(const OperandIndex &ind)
+void Graph::addOutput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_output.emplace(name, IOIndex{_outputs.size()});
_outputs.append(ind);
}
+IOIndex Graph::getInputIndex(const std::string &name) const
+{
+ auto itr = _name_to_input.find(name);
+ return (itr == _name_to_input.end()) ? IOIndex{} : itr->second;
+}
+
+IOIndex Graph::getOutputIndex(const std::string &name) const
+{
+ auto itr = _name_to_output.find(name);
+ return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
+}
+
void Graph::finishBuilding(void)
{
assert(isBuildingPhase());
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
index 2b29a9ea9..4bea1a55d 100644
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -17,7 +17,7 @@
#include "GraphIterator.h"
#include "ir/OperationIndexMap.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
index 534ffef80..b54314e0e 100644
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ b/runtime/onert/core/src/ir/GraphIterator.h
@@ -23,12 +23,19 @@
namespace onert
{
+namespace compiler
+{
+class LoweredGraph;
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
namespace ir
{
class Graph;
class Operation;
-class LoweredGraph;
class OpSequence;
template <bool is_const> class Iterator
@@ -65,7 +72,8 @@ public:
using NodeRef = typename Iterator<is_const>::NodeRef;
using IterFn = typename Iterator<is_const>::IterFn;
using LoweredGraphRef =
- typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type;
+ typename std::conditional<is_const, const typename compiler::LoweredGraph &,
+ typename compiler::LoweredGraph &>::type;
using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
using OpSeqIndexRef = const OpSequenceIndex &;
using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
index a87d31a9f..68884783e 100644
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ b/runtime/onert/core/src/ir/OpSequences.cc
@@ -83,15 +83,6 @@ OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index)
return ret;
}
-// TODO: Extract this into external helper function
-void OpSequences::dump(const std::string &msg, const Operations &operations) const
-{
- VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
- iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
- VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
- });
-}
-
void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
{
const auto op_seq_index = findOperation(operation_index);
@@ -122,5 +113,12 @@ OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index
throw std::runtime_error("Operation not found");
}
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
+{
+ op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+ VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
+ });
+}
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index e3cbce57a..48361f464 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -27,206 +27,137 @@ namespace ir
using namespace operation;
-OperationDumper::OperationDumper(const std::string &start_msg)
+namespace
{
- VERBOSE(LIR) << start_msg << std::endl;
-}
-
-void OperationDumper::visit(const Abs &node)
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Abs" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")"
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Add &node)
+void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Add" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", "
- << node.getInputs().at(Add::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const ArgMax &node)
-{
- VERBOSE(LIR) << "* ArgMax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+ << ") " << adding_input << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const AvgPool2D &node)
+void dumpConvOp(const Operation &node, const std::string &padding_type)
{
- VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
+ << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
+ << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const BatchToSpaceND &node)
+void dumpPackingOp(const Operation &node)
{
- VERBOSE(LIR) << "* BatchToSpaceND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")"
- << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ std::string inputs;
+ for (auto i : node.getInputs())
+ {
+ inputs += std::to_string(i.value()) + ",";
+ }
+ VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
+} // namespace
-void OperationDumper::visit(const operation::BroadcastTo &node)
+OperationDumper::OperationDumper(const std::string &start_msg)
{
- VERBOSE(LIR) << "* BroadcastTo" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", "
- << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const Cast &node)
-{
- VERBOSE(LIR) << "* Cast" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Comparison &node)
+void OperationDumper::visit(const BatchToSpaceND &node)
{
- VERBOSE(LIR) << "* Comparison" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", "
- << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string block_size =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+ dumpUnaryInputOp(node, block_size);
}
-void OperationDumper::visit(const Concat &node)
-{
- VERBOSE(LIR) << "* Concat" << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Conv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
- << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const ConvertFp16ToFp32 &node)
-{
- VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const ConvertFp32ToFp16 &node)
-{
- VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Cos &node)
-{
- VERBOSE(LIR) << "* Cos" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const DepthToSpace &node)
-{
- VERBOSE(LIR) << "* DepthToSpace" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const DepthwiseConv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT)
- << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const Dequantize &node)
+void OperationDumper::visit(const ElementwiseActivation &node)
{
- VERBOSE(LIR) << "* Dequantize" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string params;
+ if (node.param().op_type == ElementwiseActivation::Type::RELU)
+ {
+ params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" +
+ std::to_string(node.param().beta) + ")";
+ }
+ else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU)
+ {
+ params = " alpha value(" + std::to_string(node.param().alpha) + ")";
+ }
+ dumpUnaryInputOp(node, params);
}
-void OperationDumper::visit(const Div &node)
-{
- VERBOSE(LIR) << "* Div" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", "
- << node.getInputs().at(Div::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const EmbeddingLookup &node)
{
- VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS)
<< ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Exp &node)
-{
- VERBOSE(LIR) << "* Exp" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const ExpandDims &node)
{
- VERBOSE(LIR) << "* ExpandDims" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT)
- << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Floor &node)
-{
- VERBOSE(LIR) << "* Floor" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const FullyConnected &node)
{
- VERBOSE(LIR) << "* FullyConnected" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT)
- << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias("
- << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+ ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Gather &node)
{
- VERBOSE(LIR) << "* Gather" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices("
- << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string indices =
+ "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+ dumpUnaryInputOp(node, indices);
}
void OperationDumper::visit(const HashtableLookup &node)
@@ -242,36 +173,15 @@ void OperationDumper::visit(const HashtableLookup &node)
void OperationDumper::visit(const InstanceNorm &node)
{
- VERBOSE(LIR) << "* InstanceNorm" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT)
- << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta("
- << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const L2Normalization &node)
-{
- VERBOSE(LIR) << "* L2Normalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
+ ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const L2Pool2D &node)
-{
- VERBOSE(LIR) << "* L2Pool2D" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const LocalResponseNormalization &node)
-{
- VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const LSTM &node)
{
@@ -307,93 +217,12 @@ void OperationDumper::visit(const LSTM &node)
<< node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
-void OperationDumper::visit(const Log &node)
-{
- VERBOSE(LIR) << "* Log" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalAnd &node)
-{
- VERBOSE(LIR) << "* LogicalAnd" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalNot &node)
-{
- VERBOSE(LIR) << "* LogicalNot" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalOr &node)
-{
- VERBOSE(LIR) << "* LogicalOr" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Logistic &node)
-{
- VERBOSE(LIR) << "* Logistic" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const MaxPool2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Mul &node)
-{
- VERBOSE(LIR) << "* Mul" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", "
- << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Neg &node)
-{
- VERBOSE(LIR) << "* Neg" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Pack &node)
-{
- VERBOSE(LIR) << "* Pack" << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Pad &node)
{
- VERBOSE(LIR) << "* Pad" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad("
- << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")";
+ dumpUnaryInputOp(node, pad);
}
void OperationDumper::visit(const Permute &node)
@@ -417,86 +246,46 @@ void OperationDumper::visit(const Permute &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Pow &node)
+void OperationDumper::visit(const Pool2D &node)
{
- VERBOSE(LIR) << "* Pow" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", "
- << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const PReLU &node)
-{
- VERBOSE(LIR) << "* PReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha("
- << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Reduce &node)
-{
- VERBOSE(LIR) << "* " + node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")"
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
<< std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ReLU &node)
-{
- VERBOSE(LIR) << "* ReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
-void OperationDumper::visit(const ReLU1 &node)
+void OperationDumper::visit(const PReLU &node)
{
- VERBOSE(LIR) << "* ReLU1" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string alpha =
+ "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+ dumpUnaryInputOp(node, alpha);
}
-void OperationDumper::visit(const ReLU6 &node)
-{
- VERBOSE(LIR) << "* ReLU6" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+
+void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reshape &node)
{
- VERBOSE(LIR) << "* Reshape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")";
// optional param
- if (node.getInputs().size() == 2)
- {
- VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")";
- }
- else
- {
- VERBOSE(LIR) << " Shape(not provided)";
- }
- VERBOSE(LIR) << std::endl;
-
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string shape =
+ node.getInputs().size() == 2
+ ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+ : "Shape(not provided)";
+ dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node)
-{
- VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reverse &node)
{
- VERBOSE(LIR) << "* Reverse" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis("
- << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const RNN &node)
@@ -512,162 +301,65 @@ void OperationDumper::visit(const RNN &node)
<< std::endl;
}
-void OperationDumper::visit(const Round &node)
-{
- VERBOSE(LIR) << "* Round" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Range &node)
{
VERBOSE(LIR) << "* Range" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")"
+ VERBOSE(LIR) << " - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")"
<< " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")"
<< " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const RSQRT &node)
-{
- VERBOSE(LIR) << "* RSQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Select &node)
{
VERBOSE(LIR) << "* Select" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")"
+ VERBOSE(LIR) << " - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")"
<< " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")"
<< " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ir::operation::Shape &node)
-{
- VERBOSE(LIR) << "* Shape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Sin &node)
-{
- VERBOSE(LIR) << "* Sin" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Softmax &node)
-{
- VERBOSE(LIR) << "* Softmax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const SpaceToBatchND &node)
{
- VERBOSE(LIR) << "* SpaceToBatchND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT)
- << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE)
- << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+ ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+ ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const SpaceToDepth &node)
-{
- VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Split &node)
-{
- VERBOSE(LIR) << "* Split" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const SQRT &node)
-{
- VERBOSE(LIR) << "* SQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const SquaredDifference &node)
-{
- VERBOSE(LIR) << "* SquaredDifference" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS)
- << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const StatelessRandomUniform &node)
{
VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
- << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Squeeze &node)
-{
- VERBOSE(LIR) << "* Squeeze" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Slice &node)
-{
- VERBOSE(LIR) << "* Slice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")"
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+ << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const StridedSlice &node)
-{
- VERBOSE(LIR) << "* StridedSlice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Sub &node)
-{
- VERBOSE(LIR) << "* Sub" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", "
- << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Tanh &node)
-{
- VERBOSE(LIR) << "* TanH" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Tile &node)
{
- VERBOSE(LIR) << "* Tile" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", "
- << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string multiples =
+ "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+ dumpUnaryInputOp(node, multiples);
}
void OperationDumper::visit(const TopKV2 &node)
@@ -692,17 +384,11 @@ void OperationDumper::visit(const TransposeConv &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node)
-{
- VERBOSE(LIR) << "* Transpose" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Unpack &node)
{
- VERBOSE(LIR) << "* Unpack" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
<< std::endl;
std::string outputs;
@@ -716,25 +402,9 @@ void OperationDumper::visit(const Unpack &node)
VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const Min &node)
-{
- VERBOSE(LIR) << "* Min" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", "
- << node.getInputs().at(Min::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Max &node)
-{
- VERBOSE(LIR) << "* Max" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", "
- << node.getInputs().at(Max::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const OneHot &node)
{
- VERBOSE(LIR) << "* OneHot" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : "
<< "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
@@ -742,7 +412,7 @@ void OperationDumper::visit(const OneHot &node)
void OperationDumper::visit(const If &node)
{
- VERBOSE(LIR) << "* If" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -767,7 +437,7 @@ void OperationDumper::visit(const If &node)
void OperationDumper::visit(const While &node)
{
- VERBOSE(LIR) << "* While" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -790,13 +460,5 @@ void OperationDumper::visit(const While &node)
VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const ZerosLike &node)
-{
- VERBOSE(LIR) << "* RoZerosLike" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index d83f1493f..e8ab3b3cd 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,85 +31,61 @@ public:
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::Abs &) override;
- void visit(const operation::Add &node) override;
void visit(const operation::ArgMax &) override;
- void visit(const operation::AvgPool2D &node) override;
void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::BroadcastTo &) override;
- void visit(const operation::Cast &) override;
void visit(const operation::Comparison &) override;
void visit(const operation::Concat &node) override;
void visit(const operation::Conv2D &node) override;
void visit(const operation::ConvertFp16ToFp32 &node) override;
void visit(const operation::ConvertFp32ToFp16 &node) override;
- void visit(const operation::Cos &node) override;
void visit(const operation::DepthToSpace &) override;
void visit(const operation::DepthwiseConv2D &node) override;
- void visit(const operation::Dequantize &) override;
- void visit(const operation::Div &) override;
+ void visit(const operation::ElementwiseActivation &) override;
+ void visit(const operation::ElementwiseBinary &) override;
+ void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
- void visit(const operation::Exp &) override;
void visit(const operation::ExpandDims &) override;
- void visit(const operation::Floor &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
void visit(const operation::InstanceNorm &) override;
void visit(const operation::L2Normalization &) override;
- void visit(const operation::L2Pool2D &) override;
void visit(const operation::LocalResponseNormalization &) override;
- void visit(const operation::Log &) override;
- void visit(const operation::LogicalAnd &) override;
- void visit(const operation::LogicalNot &) override;
- void visit(const operation::LogicalOr &) override;
- void visit(const operation::Logistic &) override;
void visit(const operation::LSTM &) override;
- void visit(const operation::MaxPool2D &node) override;
- void visit(const operation::Mul &) override;
- void visit(const operation::Neg &) override;
void visit(const operation::Pack &) override;
void visit(const operation::Pad &) override;
void visit(const operation::Permute &node) override;
+ void visit(const operation::Pool2D &node) override;
void visit(const operation::Pow &node) override;
void visit(const operation::PReLU &) override;
void visit(const operation::Range &) override;
+ void visit(const operation::Rank &) override;
void visit(const operation::Reduce &) override;
- void visit(const operation::ReLU &) override;
- void visit(const operation::ReLU1 &) override;
- void visit(const operation::ReLU6 &) override;
void visit(const operation::Reshape &node) override;
void visit(const operation::ResizeBilinear &) override;
void visit(const operation::Reverse &) override;
void visit(const operation::RNN &) override;
- void visit(const operation::Round &) override;
- void visit(const operation::RSQRT &) override;
void visit(const operation::Select &node) override;
void visit(const operation::Shape &node) override;
- void visit(const operation::Sin &node) override;
void visit(const operation::Softmax &node) override;
void visit(const operation::SpaceToBatchND &) override;
void visit(const operation::SpaceToDepth &) override;
void visit(const operation::Split &) override;
- void visit(const operation::SQRT &) override;
void visit(const operation::SquaredDifference &) override;
void visit(const operation::Squeeze &) override;
void visit(const operation::Slice &) override;
void visit(const operation::StridedSlice &) override;
void visit(const operation::StatelessRandomUniform &) override;
- void visit(const operation::Sub &) override;
- void visit(const operation::Tanh &) override;
void visit(const operation::Tile &) override;
void visit(const operation::TopKV2 &) override;
void visit(const operation::TransposeConv &) override;
void visit(const operation::Transpose &) override;
void visit(const operation::Unpack &) override;
- void visit(const operation::Min &) override;
- void visit(const operation::Max &) override;
void visit(const operation::OneHot &) override;
void visit(const operation::If &) override;
void visit(const operation::While &) override;
- void visit(const operation::ZerosLike &) override;
};
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
index 31969911f..d74f80217 100644
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -50,7 +50,7 @@ inline ExplicitPadding validPadding(void)
}
inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
ExplicitPadding padding;
@@ -61,14 +61,19 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
// padding_to_beginning = total_padding / 2
// padding_to_end = (total_padding + 1)/2.
//
+ const int32_t effective_filter_h_size = (kh - 1) * dhf + 1;
+ const int32_t effective_filter_w_size = (kw - 1) * dwf + 1;
+
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
(ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
- const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+ const int32_t vertical_needed_input =
+ (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
- const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+ const int32_t horizontal_needed_input =
+ (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
@@ -80,7 +85,8 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
}
inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
- const Stride &stride, uint32_t kw, uint32_t kh)
+ const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf,
+ uint32_t dhf)
{
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
@@ -92,7 +98,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
UNUSED_RELEASE(vertical_expected_output);
UNUSED_RELEASE(horizontal_expected_output);
- return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+ return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf);
}
} // namespace
@@ -130,7 +136,7 @@ Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
if (padding.type == PaddingType::EXPLICIT)
{
@@ -138,7 +144,7 @@ const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShap
}
else if (padding.type == PaddingType::SAME)
{
- return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+ return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf);
}
else if (padding.type == PaddingType::VALID)
{
diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc
deleted file mode 100644
index b06705d07..000000000
--- a/runtime/onert/core/src/ir/operation/Abs.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc
deleted file mode 100644
index 28d4fcb54..000000000
--- a/runtime/onert/core/src/ir/operation/AvgPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 2fa30f8ed..2b1422c73 100644
--- a/runtime/onert/core/src/ir/operation/Add.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
* limitations under the License.
*/
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
#include <cassert>
+#include <unordered_map>
#include "ir/OperationVisitor.h"
@@ -27,14 +28,25 @@ namespace ir
namespace operation
{
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
+void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
+BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
: Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
+std::string BinaryArithmetic::name() const
+{
+ using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
+ static const std::unordered_map<ArithmeticType, std::string> name_map{
+ {ArithmeticType::ADD, std::string{"Add"}},
+ {ArithmeticType::SUB, std::string{"Sub"}},
+ {ArithmeticType::MUL, std::string{"Mul"}},
+ {ArithmeticType::DIV, std::string{"Div"}}};
+ return name_map.at(_param.arithmetic_type);
+}
+
} // namespace operation
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc
deleted file mode 100644
index 09d9c327e..000000000
--- a/runtime/onert/core/src/ir/operation/Cast.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc
deleted file mode 100644
index 14d6362bd..000000000
--- a/runtime/onert/core/src/ir/operation/Dequantize.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc
deleted file mode 100644
index b095d9811..000000000
--- a/runtime/onert/core/src/ir/operation/Div.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
new file mode 100644
index 000000000..f6718b656
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+ if (param.op_type == Type::LOGISTIC)
+ {
+ assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
+ "sigmoid function(L=1, k=1, x0=0). So, do "
+ "not use alpha and beta");
+ }
+ else if (param.op_type == Type::RELU)
+ {
+ assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta");
+ }
+ else if (param.op_type == Type::TANH)
+ {
+ assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
+ "supported only the values of alpha and "
+ "beta are 1.f");
+ }
+}
+
+std::string ElementwiseActivation::name() const
+{
+ using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
+ static const std::unordered_map<Type, std::string> name_map{
+ {ElementwiseActivationType::ELU, "ELU"},
+ {ElementwiseActivationType::LOGISTIC, "Logistic"},
+ {ElementwiseActivationType::RELU, "ReLU"},
+ {ElementwiseActivationType::TANH, "Tanh"},
+ {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+ return name_map.at(_param.op_type);
+}
+
+float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity();
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
new file mode 100644
index 000000000..3287fc0a3
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseBinary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseBinary::name() const
+{
+ using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
+ static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
+ {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+ {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+ {ElementwiseBinaryType::MAX, std::string{"Max"}},
+ {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
new file mode 100644
index 000000000..7dfcd4a98
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseUnary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseUnary::name() const
+{
+ using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
+ static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
+ {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+ {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+ {ElementwiseUnaryType::COS, std::string{"Cos"}},
+ {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+ {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+ {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+ {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+ {ElementwiseUnaryType::LOG, std::string{"Log"}},
+ {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+ {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+ {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+ {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+ {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+ {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+ {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+ {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+ {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc
deleted file mode 100644
index 0b22e080a..000000000
--- a/runtime/onert/core/src/ir/operation/Exp.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc
deleted file mode 100644
index dc01535ad..000000000
--- a/runtime/onert/core/src/ir/operation/Floor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc
deleted file mode 100644
index 8f21b93e0..000000000
--- a/runtime/onert/core/src/ir/operation/L2Pool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc
deleted file mode 100644
index 0d50706ca..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalAnd.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc
deleted file mode 100644
index 8f1142102..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalNot.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc
deleted file mode 100644
index d75207c4a..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalOr.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc
deleted file mode 100644
index 77d9d17de..000000000
--- a/runtime/onert/core/src/ir/operation/Logistic.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc
deleted file mode 100644
index 281f9d451..000000000
--- a/runtime/onert/core/src/ir/operation/Max.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc
deleted file mode 100644
index eac53cc5e..000000000
--- a/runtime/onert/core/src/ir/operation/MaxPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc
deleted file mode 100644
index 8be7f0cc8..000000000
--- a/runtime/onert/core/src/ir/operation/Min.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc
deleted file mode 100644
index 03cdf1b61..000000000
--- a/runtime/onert/core/src/ir/operation/Mul.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc
deleted file mode 100644
index df623a13b..000000000
--- a/runtime/onert/core/src/ir/operation/Neg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
index aecc2d994..0c56e92e3 100644
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -27,8 +27,10 @@ namespace operation
void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
+// PAD: 2 inputs
+// PADV2: 3 inputs
Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
new file mode 100644
index 000000000..761d14c3d
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Pool2D.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string Pool2D::name() const
+{
+ using PoolType = onert::ir::operation::Pool2D::PoolType;
+ static const std::unordered_map<PoolType, std::string> name_map{
+ {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+ {PoolType::L2, "L2" + std::string{toString(opcode())}},
+ {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc
deleted file mode 100644
index 2bce1fa28..000000000
--- a/runtime/onert/core/src/ir/operation/RSQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cos.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index 831a92dbd..c357e9018 100644
--- a/runtime/onert/core/src/ir/operation/Cos.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/Cos.h"
+#include "ir/operation/Rank.h"
#include <cassert>
@@ -27,9 +27,9 @@ namespace ir
namespace operation
{
-void Cos::accept(OperationVisitor &v) const { v.visit(*this); }
+void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
-Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
: Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc
deleted file mode 100644
index f0c88478b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc
deleted file mode 100644
index 734f0b65b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU1.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc
deleted file mode 100644
index 5972329af..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU6.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Round.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 16dfb2b1b..9f17af97c 100644
--- a/runtime/onert/core/src/ir/operation/Round.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/Round.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include <cassert>
@@ -27,10 +27,12 @@ namespace ir
namespace operation
{
-void Round::accept(OperationVisitor &v) const { v.visit(*this); }
+void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); }
-Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc
deleted file mode 100644
index ad887d89a..000000000
--- a/runtime/onert/core/src/ir/operation/SQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc
deleted file mode 100644
index d71071686..000000000
--- a/runtime/onert/core/src/ir/operation/Sub.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc
deleted file mode 100644
index 8fab0c0f3..000000000
--- a/runtime/onert/core/src/ir/operation/Tanh.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ZerosLike.cc b/runtime/onert/core/src/ir/operation/ZerosLike.cc
deleted file mode 100644
index 5f49b98d1..000000000
--- a/runtime/onert/core/src/ir/operation/ZerosLike.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ZerosLike.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
deleted file mode 100644
index 6dec9ea8f..000000000
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "LoweredOperationPass.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
-{
-public:
- using LoweredOperationPass::LoweredOperationPass;
-
-public:
- std::string id() final { return "PermutationOperationPass"; }
-
-public:
- void callback(const OperationIndex &i, Operation &n) final;
-
-public:
- void visit(const operation::Add &) final;
- void visit(const operation::Comparison &) final;
- void visit(const operation::Concat &) final;
- void visit(const operation::Div &) final;
- void visit(const operation::LogicalAnd &) final;
- void visit(const operation::LogicalNot &) final;
- void visit(const operation::LogicalOr &) final;
- void visit(const operation::Max &) final;
- void visit(const operation::Min &) final;
- void visit(const operation::Mul &) final;
- void visit(const operation::Pack &) final;
- void visit(const operation::PReLU &) final;
- void visit(const operation::SquaredDifference &) final;
- void visit(const operation::Sub &) final;
- void visit(const operation::Unpack &) final;
- void visit(const operation::FullyConnected &) final;
- void visit(const operation::Gather &) final;
- void visit(const operation::Reshape &) final;
-
-private:
- void applyExpandRanks(const Operation &);
- void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index ec7f92117..13a599bed 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -21,7 +21,12 @@
#include <unordered_map>
#include <json/json.h>
#include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+// json type for Chrome Event Trace
namespace
{
@@ -110,6 +115,290 @@ std::string object(const CounterEvent &evt)
} // namespace
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OpSeqCmp
+ {
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t opseq_latency = end_ts - begin_ts;
+ double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+ void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto it : name_to_opseq)
+ {
+ auto opseq = it.second;
+ opseq.graph_latency = graph_latency;
+
+ opseqs.insert(opseq);
+
+ updateRss(opseq.min_rss);
+ updateRss(opseq.max_rss);
+ updateMinflt(opseq.min_page_reclaims);
+ updateMinflt(opseq.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> opseq_headers{
+ "OpSeq name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> opseq_headers_line{
+ "----------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## OpSequences \n";
+
+ // OpSeq's Header
+ writeMDTableRow(os, opseq_headers);
+ writeMDTableRow(os, opseq_headers_line);
+
+ // OpSeq's contents
+ for (auto opseq : opseqs)
+ {
+ opseq.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+ }
+
+ MDTableBuilder &build()
+ {
+ for (auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, OpSeq> name_to_opseq;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto &evt = _duration_events[i];
+ assert(evt.name.compare("Graph") != 0);
+ assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+ if (evt.ph.compare("B") == 0)
+ {
+ assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+ name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+ }
+ else
+ {
+ assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+ auto &opseq = name_to_opseq.at(evt.name);
+ updateOpSeq(opseq, evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto &evt = _duration_events.at(i);
+ if (evt.name.compare("Graph") == 0)
+ {
+ if (evt.ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ }
+ return graph_idx_list;
+ }
+
+ OpSeq makeOpSeq(const DurationEvent &evt)
+ {
+ OpSeq opseq;
+ opseq.name = evt.name;
+ opseq.begin_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+ opseq.backend = evt.tid;
+ return opseq;
+ }
+
+ void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+ {
+ opseq.end_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ Graph graph;
+ graph.name = "Graph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+ graph.setOpSeqs(name_to_opseq);
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ os << "# Graph " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<DurationEvent> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
void EventRecorder::emit(const DurationEvent &evt)
{
std::lock_guard<std::mutex> lock{_mu};
@@ -136,6 +425,9 @@ void EventRecorder::writeToFile(std::ostream &os)
case WriteFormat::SNPE_BENCHMARK:
writeSNPEBenchmark(os);
break;
+ case WriteFormat::MD_TABLE:
+ writeMDTable(os);
+ break;
default:
assert(!"Invalid value");
break;
@@ -258,3 +550,8 @@ void EventRecorder::writeChromeTrace(std::ostream &os)
os << " ]\n";
os << "}\n";
}
+
+void EventRecorder::writeMDTable(std::ostream &os)
+{
+ MDTableBuilder(_duration_events, _counter_events).build().write(os);
+}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 6eea06986..37ec1a0f1 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -53,7 +53,8 @@ public:
enum class WriteFormat
{
CHROME_TRACING,
- SNPE_BENCHMARK
+ SNPE_BENCHMARK,
+ MD_TABLE,
};
public:
@@ -71,6 +72,7 @@ public:
private:
void writeSNPEBenchmark(std::ostream &os);
void writeChromeTrace(std::ostream &os);
+ void writeMDTable(std::ostream &os);
private:
std::mutex _mu;
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 9a24f8c1a..95c15049d 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -18,8 +18,6 @@
#include "util/Utils.h"
#include "ir/InternalType.h"
#include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
#include "util/ShapeInference.h"
#include "util/logging.h"
@@ -81,10 +79,12 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
// Calculate output height and width of convolution-like operation
std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
const int ker_w, const ir::Padding pad,
- const ir::Stride stride)
+ const ir::Stride stride,
+ const ir::Dilation dilation = {1, 1})
{
int32_t out_h = 0, out_w = 0;
-
+ int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1;
+ int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1;
switch (pad.type)
{
case ir::PaddingType::SAME:
@@ -92,12 +92,15 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
out_w = ceil_div(in_w, stride.horizontal);
break;
case ir::PaddingType::VALID:
- out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
- out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+ out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical);
+ out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal);
break;
case ir::PaddingType::EXPLICIT:
- out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
- out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+ out_h =
+ (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+ out_w =
+ (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
+ 1;
break;
default:
assert(false);
@@ -126,17 +129,6 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
return out_shape;
}
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes,
bool keep_dims)
{
@@ -320,7 +312,7 @@ ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape
assert(ifm_shape.C == kf_shape.C);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
+ param.padding, param.stride, param.dilation);
return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N};
}
@@ -411,17 +403,6 @@ ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indice
return out_shape;
}
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis)
{
assert(depth >= 0);
@@ -486,6 +467,17 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
return ret;
}
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+ const ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+ param.padding, param.stride);
+ // Pooling don't change number of channels and batch size
+ return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
+}
+
ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
const int32_t output_width)
{
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 0f6a2a5d0..480452e01 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -105,40 +105,39 @@ protected:
template <typename Param, typename OptionsType>
void loadStridesAndPaddings(Param &param, const OptionsType *options);
// Load Pool2D param
- template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
+ template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
// Operations
void loadConv2D(const Operator *op, ir::Graph &subg);
void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
void loadTransposeConv(const Operator *op, ir::Graph &subg);
- void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+ void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
void loadReshape(const Operator *op, ir::Graph &subg);
void loadSoftmax(const Operator *op, ir::Graph &subg);
- void loadMaxPool2D(const Operator *op, ir::Graph &subg);
void loadConcatenation(const Operator *op, ir::Graph &subg);
void loadFill(const Operator *op, ir::Graph &subg);
void loadFC(const Operator *op, ir::Graph &subg);
- void loadAdd(const Operator *op, ir::Graph &subg);
- void loadSub(const Operator *op, ir::Graph &subg);
- void loadMul(const Operator *op, ir::Graph &subg);
- void loadDiv(const Operator *op, ir::Graph &subg);
+ template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+ void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+ void loadAddV2(const Operator *op, ir::Graph &subg);
void loadPack(const Operator *op, ir::Graph &subg);
- void loadRelu(const Operator *op, ir::Graph &subg);
- void loadRelu6(const Operator *op, ir::Graph &subg);
void loadResizeBilinear(const Operator *op, ir::Graph &subg);
- void loadRsqrt(const Operator *op, ir::Graph &subg);
+ void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
void loadSelect(const Operator *op, ir::Graph &subg);
- void loadSqrt(const Operator *op, ir::Graph &subg);
void loadSquaredDifference(const Operator *op, ir::Graph &subg);
- void loadTanh(const Operator *op, ir::Graph &subg);
void loadTranspose(const Operator *op, ir::Graph &subg);
- void loadReduce(const Operator *op, ir::Graph &subg,
- ir::operation::Reduce::ReduceType reduce_type);
+ template <ir::operation::Reduce::ReduceType reduce_type>
+ void loadReduce(const Operator *op, ir::Graph &subg);
void loadReduceAll(const Operator *op, ir::Graph &subg);
void loadReverseV2(const Operator *op, ir::Graph &subg);
void loadPad(const Operator *op, ir::Graph &subg);
- void loadLogistic(const Operator *op, ir::Graph &subg);
- void loadExp(const Operator *op, ir::Graph &subg);
+ void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f);
+ template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+ void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+ void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseUnary::Type op_type);
void loadExpandDims(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
void loadCustom(const Operator *op, ir::Graph &subg);
@@ -152,35 +151,25 @@ protected:
void loadSlice(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
- void loadMinimum(const Operator *op, ir::Graph &subg);
- void loadMaximum(const Operator *op, ir::Graph &subg);
- void loadCast(const Operator *op, ir::Graph &subg);
void loadComparison(const Operator *op, ir::Graph &subg);
void loadEinsum(const Operator *op, ir::Graph &subg);
void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadAbs(const Operator *op, ir::Graph &subg);
- void loadCos(const Operator *op, ir::Graph &subg);
- void loadSin(const Operator *op, ir::Graph &subg);
void loadShape(const Operator *op, ir::Graph &subg);
void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
- void loadNeg(const Operator *op, ir::Graph &subg);
- void loadLog(const Operator *op, ir::Graph &subg);
void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadRound(const Operator *op, ir::Graph &subg);
void loadPow(const Operator *op, ir::Graph &subg);
- void loadLogicalNot(const Operator *op, ir::Graph &subg);
- void loadZerosLike(const Operator *op, ir::Graph &subg);
void loadTile(const Operator *op, ir::Graph &subg);
- void loadLogicalOr(const Operator *op, ir::Graph &subg);
void loadRange(const Operator *op, ir::Graph &subg);
+ void loadRank(const Operator *op, ir::Graph &subg);
void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
void loadBroadcastTo(const Operator *op, ir::Graph &subg);
void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
- void loadQuantize(const Operator *op, ir::Graph &subg);
void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
+ void loadL2Normalization(const Operator *op, ir::Graph &subg);
+ void loadLeakyRelu(const Operator *op, ir::Graph &subg);
protected:
// Base address for mapped region for loading (if needed)
@@ -194,6 +183,7 @@ protected:
const Model *_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
+ std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
};
@@ -466,8 +456,8 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
subg.setOperandValue(operand_index, std::move(data_obj));
}
- // Name unused
- // auto name = tensor->name();
+ _tensor_names.emplace(operand_index, tensor->name()->str());
+
// Variablie
if (tensor->is_variable())
throw std::runtime_error("Variable tensor not supported!");
@@ -518,8 +508,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &par
template <typename LoaderDomain, typename SpecificLoader>
template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
- const Pool2DOptions *options)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
+ const Pool2DOptions *options)
{
// Strides and Paddings
loadStridesAndPaddings(param, options);
@@ -543,7 +533,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir
const auto *options = op->builtin_options_as_Conv2DOptions();
param.activation = convertActivation(options->fused_activation_function());
loadStridesAndPaddings(param, options);
- // Dilation h/w factor unused
+
+ param.dilation.width_factor = options->dilation_w_factor();
+ param.dilation.height_factor = options->dilation_h_factor();
+
std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -585,19 +578,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
+ ir::operation::Pool2D::PoolType op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::AvgPool2D::Param param;
+ ir::operation::Pool2D::Param param;
+ param.op_type = op_type;
const auto *options = op->builtin_options_as_Pool2DOptions();
- loadPool2D(param, options);
+ loadPool2DOptions(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -645,23 +640,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, i
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::MaxPool2D::Param param;
- const auto *options = op->builtin_options_as_Pool2DOptions();
-
- loadPool2D(param, options);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
ir::Graph &subg)
{
@@ -719,70 +697,82 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Gr
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Add::Param param;
- const auto *options = op->builtin_options_as_AddOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Sub::Param param;
- const auto *options = op->builtin_options_as_SubOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Mul::Param param;
- const auto *options = op->builtin_options_as_MulOptions();
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = convertActivation(options->fused_activation_function());
+ if (op->custom_options() == nullptr)
+ {
+ param.activation = ir::Activation::NONE;
+ }
+ else
+ {
+ size_t custom_op_data_size = op->custom_options()->size();
+ auto custom_op_data = op->custom_options()->Data();
+ auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+ auto attr_map = data_root.AsMap();
+ const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
+ attr_map["fused_activation_function"].AsInt8());
+ param.activation = convertActivation(fused_activation_func);
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Div::Param param;
- const auto *options = op->builtin_options_as_DivOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ switch (op_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ const auto *add_options = op->builtin_options_as_AddOptions();
+ param.activation = convertActivation(add_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ const auto *sub_options = op->builtin_options_as_SubOptions();
+ param.activation = convertActivation(sub_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ const auto *mul_options = op->builtin_options_as_MulOptions();
+ param.activation = convertActivation(mul_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ const auto *div_options = op->builtin_options_as_DivOptions();
+ param.activation = convertActivation(div_options->fused_activation_function());
+ break;
+ }
+ default:
+ assert(false &&
+ "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations");
+ break;
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -805,26 +795,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+ float alpha, float beta)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ ir::operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseActivation(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -856,38 +842,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto size = inputs.at(1);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ if (!subg.operands().at(size).isConstant())
+ throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
- loadOperationIO(op, inputs, outputs);
+ ir::operation::ResizeNearestNeighbor::Param param;
+ param.height_out = size_v[0];
+ param.width_out = size_v[1];
+ param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
subg.addOperation(std::move(new_op));
}
@@ -905,18 +893,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Opera
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -937,8 +913,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op,
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(
- const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type)
+template <ir::operation::Reduce::ReduceType reduce_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
@@ -1005,26 +981,49 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::G
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+ ir::operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseBinary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+ ir::operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ auto qasymm8ToUint8 = [](ir::Operand &operand) {
+ if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+ {
+ operand.type(ir::DataType::UINT8);
+ }
+ };
+ qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+ }
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseUnary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -1177,6 +1176,17 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1197,7 +1207,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
Einsum,
BroadcastTo,
FusedBatchNorm,
- StatelessRandomUniform
+ StatelessRandomUniform,
+ Erf
};
// Mapping from custom op name string to BuiltinOP enum
@@ -1210,6 +1221,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
{"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
{"BroadcastTo", BuiltinOP::BroadcastTo},
{"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+ {"Erf", BuiltinOP::Erf},
};
try
@@ -1219,7 +1231,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
switch (custom_op_id)
{
case BuiltinOP::AddV2:
- loadAdd(op, subg);
+ loadAddV2(op, subg);
break;
case BuiltinOP::ReduceAll:
loadReduceAll(op, subg);
@@ -1242,6 +1254,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
case BuiltinOP::StatelessRandomUniform:
loadStatelessRandomUniform(op, subg);
break;
+ case BuiltinOP::Erf:
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
+ break;
default:
throw std::runtime_error{
"Loader: Custom OP map is defined but operation loader function is not defined"};
@@ -1396,51 +1411,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto qasymm8ToUint8 = [](ir::Operand &operand) {
- if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
- {
- operand.type(ir::DataType::UINT8);
- }
- };
- qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
- qasymm8ToUint8(subg.operands().at(outputs.at(0)));
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1562,42 +1532,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1652,18 +1586,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir:
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1697,30 +1619,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1733,31 +1631,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::G
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs));
-
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1787,18 +1660,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1817,18 +1678,27 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+{
+ float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+ 1.f);
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
{
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1839,7 +1709,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadConv2D(op, subg);
return;
case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
- loadAvgPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG);
return;
case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
loadDepthwiseConv2D(op, subg);
@@ -1854,7 +1724,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
- loadMaxPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX);
return;
case BuiltinOperator::BuiltinOperator_CONCATENATION:
loadConcatenation(op, subg);
@@ -1863,31 +1733,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadFC(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ADD:
- loadAdd(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUB:
- loadSub(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MUL:
- loadMul(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_DIV:
- loadDiv(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_RELU:
- loadRelu(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
+ ir::operation::ElementwiseActivation::infinity, 0.f);
+ return;
+ case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1:
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f,
+ -1.f);
return;
case BuiltinOperator::BuiltinOperator_RELU6:
- loadRelu6(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f,
+ 0.f);
return;
case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
loadResizeBilinear(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+ loadResizeNearestNeighbor(op, subg);
+ return;
case BuiltinOperator::BuiltinOperator_RSQRT:
- loadRsqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
return;
case BuiltinOperator::BuiltinOperator_SELECT:
loadSelect(op, subg);
@@ -1897,37 +1776,39 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSelect(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SQRT:
- loadSqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
loadSquaredDifference(op, subg);
return;
case BuiltinOperator::BuiltinOperator_TANH:
- loadTanh(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
+ 1.f);
return;
case BuiltinOperator::BuiltinOperator_TRANSPOSE:
loadTranspose(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MEAN:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
+ loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
+ loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
+ loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REVERSE_V2:
loadReverseV2(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PAD:
+ case BuiltinOperator::BuiltinOperator_PADV2:
loadPad(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGISTIC:
- loadLogistic(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
return;
case BuiltinOperator::BuiltinOperator_EXP:
- loadExp(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
return;
case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
loadExpandDims(op, subg);
@@ -1942,7 +1823,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadBatchToSpaceND(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUM:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
+ loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CUSTOM:
loadCustom(op, subg);
@@ -1969,13 +1850,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadUnpack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MINIMUM:
- loadMinimum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAXIMUM:
- loadMaximum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CAST:
- loadCast(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
return;
case BuiltinOperator::BuiltinOperator_EQUAL:
case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
@@ -1989,19 +1870,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadOneHot(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ABS:
- loadAbs(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS);
return;
case BuiltinOperator::BuiltinOperator_COS:
- loadCos(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS);
return;
case BuiltinOperator::BuiltinOperator_SIN:
- loadSin(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
return;
case BuiltinOperator::BuiltinOperator_SHAPE:
loadShape(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
+ loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_IF:
loadIf(op, subg);
@@ -2010,31 +1891,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadWhile(op, subg);
return;
case BuiltinOperator::BuiltinOperator_NEG:
- loadNeg(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
return;
case BuiltinOperator::BuiltinOperator_ARG_MAX:
loadArgMax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOG:
- loadLog(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
return;
case BuiltinOperator::BuiltinOperator_ROUND:
- loadRound(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
return;
case BuiltinOperator::BuiltinOperator_POW:
loadPow(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
- loadLogicalNot(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
- loadLogicalOr(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
+ op, subg);
return;
case BuiltinOperator::BuiltinOperator_FILL:
loadFill(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
- loadZerosLike(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
return;
case BuiltinOperator::BuiltinOperator_TILE:
loadTile(op, subg);
@@ -2049,11 +1931,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadLogSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_QUANTIZE:
- loadQuantize(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
return;
case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
loadSpaceToDepth(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
+ loadL2Normalization(op, subg);
+ break;
+ case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
+ loadLeakyRelu(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RANK:
+ loadRank(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 96dd4698a..92a9ee7a5 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -103,12 +103,14 @@ public:
// Set inputs
for (const std::int32_t input_ind : *circle_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *circle_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *circle_subg->operators())
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 8ff6cbbfd..8e3d83db4 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -83,6 +83,189 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
}
OperationFactory::Generator
+getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f)
+{
+ return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
+
+ return new operation::ElementwiseActivation{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator getElementwiseBinaryGenerator(
+ const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == operation::ElementwiseUnary::Type::CAST)
+ {
+ // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's
+ // input/output
+ if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, inputs.at(0), DataType::UINT8);
+ }
+ if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, outputs.at(0), DataType::UINT8);
+ }
+ }
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::BinaryArithmetic{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
+{
+ return [pool_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Pool2D::Param param;
+ param.op_type = pool_type;
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = operands.at(kh_index).asScalar<uint32_t>();
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ return new operation::Pool2D{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
{
return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) {
@@ -133,79 +316,24 @@ Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Opera
return new T{inputs, outputs};
}
-// A generator function for binary ops with no params
-template <typename T>
-Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
+OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type)
{
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
+ return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
- typename T::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
// Each input should be interpreted as follows:
//
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = operands.at(kh_index).asScalar<uint32_t>();
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- return new T{inputs, outputs, param};
+ operation::Comparison::Param param;
+ param.comparison_type = type;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
}
} // namespace
@@ -295,9 +423,9 @@ OperationFactory::OperationFactory()
return new operation::DepthwiseConv2D{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
+ _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX);
- _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
+ _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG);
_map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -383,27 +511,8 @@ OperationFactory::OperationFactory()
return new operation::Softmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
- if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, inputs.at(0), DataType::UINT8);
- }
- if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, outputs.at(0), DataType::UINT8);
- }
-
- return new operation::Cast{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_CAST] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
// ANEURALNETWORKS_CAST_EX is deprecated
// TODO Remove ANEURALNETWORKS_CAST_EX
@@ -416,7 +525,8 @@ OperationFactory::OperationFactory()
// inputCount is either 7 or 10 acccording to NN API specification.
// - Padding is implicit when inputCount is 7
// - Padding is explicit when inputCount is 10
- assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.input_count == 7 || init_param.input_count == 10 ||
+ init_param.input_count == 13);
assert(init_param.output_count == 1);
// 0 -> IFM Tensor Index
@@ -427,7 +537,6 @@ OperationFactory::OperationFactory()
OperandIndexSequence outputs{init_param.outputs[0]};
Conv2D::Param param;
-
if (init_param.input_count == 7) // support implicit padding
{
// Each input should be interpreted as follows:
@@ -445,6 +554,10 @@ OperationFactory::OperationFactory()
param.padding.type =
NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
@@ -472,34 +585,62 @@ OperationFactory::OperationFactory()
param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
padding_top_index, padding_bottom_index);
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
+ else if (init_param.input_count == 13) // support dilation
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding_left Index
+ // 4 -> Padding_right Index
+ // 5 -> Padding_top Index
+ // 6 -> Padding_bottom Index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) Index
+ // 9 -> Activation Index
+ // 11 -> Dilation (width_factor) Index
+ // 12 -> Dilation (height_factor) INdex
- return new Conv2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
+ const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+ const auto hstride_index = OperandIndex{init_param.inputs[7]};
+ const auto vstride_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+ const auto width_factor_index = OperandIndex{init_param.inputs[11]};
+ const auto height_factor_index = OperandIndex{init_param.inputs[12]};
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ auto width_factor = operands.at(width_factor_index).asScalar<int32_t>();
+ auto height_factor = operands.at(height_factor_index).asScalar<int32_t>();
- operation::Add::Param param;
+ param.dilation.width_factor = width_factor;
+ param.dilation.height_factor = height_factor;
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else
+ {
+ throw std::runtime_error{"Conv2D: unsupported input operand count"};
+ }
- return new operation::Add{inputs, outputs, param};
+ return new Conv2D{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_ADD] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+
_map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
_map[ANEURALNETWORKS_REDUCE_SUM] =
@@ -509,26 +650,8 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
_map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
- _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Sub::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Sub{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_SUB] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
_map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -611,27 +734,8 @@ OperationFactory::OperationFactory()
return new operation::Transpose{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Mul::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Mul{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_MUL] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
_map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -672,34 +776,18 @@ OperationFactory::OperationFactory()
return new operation::Squeeze{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
+ _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
- _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
+ _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
- _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
+ _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
- _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
+ _map[ANEURALNETWORKS_DIV] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Div::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Div{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
+ _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
// ANEURALNETWORKS_EXP_EX is deprecated
// TODO Remove ANEURALNETWORKS_EXP_EX
@@ -710,39 +798,17 @@ OperationFactory::OperationFactory()
// 1 -> Axis Tensor Index
_map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
- _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Greater;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_GREATER] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+ _map[ANEURALNETWORKS_GREATER_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+ _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
+ _map[ANEURALNETWORKS_LESS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+ _map[ANEURALNETWORKS_NOT_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+ _map[ANEURALNETWORKS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
// ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -767,40 +833,6 @@ OperationFactory::OperationFactory()
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Less;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_LESS_EX is deprecated
// TODO Remove ANEURALNETWORKS_LESS_EX
_map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
@@ -837,23 +869,6 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
_map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
- _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input1 Tensor Index
- // 1 -> input2 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
_map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -877,7 +892,8 @@ OperationFactory::OperationFactory()
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
+ _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -898,10 +914,14 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalAnd{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
+ _map[ANEURALNETWORKS_RSQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -937,7 +957,9 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_RSQRT_EX
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
- _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
+ _map[ANEURALNETWORKS_RELU] =
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+ onert::ir::operation::ElementwiseActivation::infinity, 0);
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -960,9 +982,11 @@ OperationFactory::OperationFactory()
return new operation::ResizeBilinear{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
+ _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
- _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
+ _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1009,17 +1033,8 @@ OperationFactory::OperationFactory()
return new operation::RNN{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Floor{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_FLOOR] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
_map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
Operands &) {
@@ -1059,7 +1074,7 @@ OperationFactory::OperationFactory()
return new operation::SpaceToDepth{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
+ _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2);
_map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
Operands &) {
@@ -1157,35 +1172,15 @@ OperationFactory::OperationFactory()
return new operation::TransposeConv{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- return new operation::SQRT{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
// ANEURALNETWORKS_SQRT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SQRT_EX
_map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
- _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::LogicalOr{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
// ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1206,10 +1201,14 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalOr{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
+ _map[ANEURALNETWORKS_LOGICAL_NOT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1228,7 +1227,10 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(0), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalNot{inputs, outputs};
+ operation::ElementwiseUnary::Param param;
+ param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
};
_map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
@@ -1306,23 +1308,6 @@ OperationFactory::OperationFactory()
return new operation::LSTM{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_EQUAL_EX
_map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -1409,13 +1394,13 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_GATHER_EX
_map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
- _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
+ _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
// ANEURALNETWORKS_NEG_EX is deprecated
// TODO Remove ANEURALNETWORKS_NEG_EX
_map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
- _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
+ _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
// ANEURALNETWORKS_ABS_EX is deprecated
// TODO Remove ANEURALNETWORKS_ABS_EX
@@ -1434,6 +1419,8 @@ OperationFactory::OperationFactory()
operation::ArgMax::Param param;
param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ // NNAPI ARGMAX output type is always int32
+ param.output_type = DataType::INT32;
return new operation::ArgMax{inputs, outputs, param};
};
@@ -1442,7 +1429,8 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
- _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
+ _map[ANEURALNETWORKS_DEQUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1600,9 +1588,11 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
- _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
+ _map[ANEURALNETWORKS_MINIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
- _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
+ _map[ANEURALNETWORKS_MAXIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -1628,23 +1618,10 @@ OperationFactory::OperationFactory()
return new operation::OneHot{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Cos{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_COS_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
- _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Sin{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
_map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 1 && init_param.output_count == 1);
@@ -1658,17 +1635,8 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_REDUCE_PROD] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
- _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Round{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_ROUND_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
_map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1695,18 +1663,8 @@ OperationFactory::OperationFactory()
// 1 -> A 1-D tensor, specifying the value
_map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
- _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ZerosLike{inputs, outputs};
- };
-
+ _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Multiple Tensor Index
@@ -1845,14 +1803,8 @@ OperationFactory::OperationFactory()
return new operation::LogSoftmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Quantize{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_QUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 86c2c6bc7..7eef15717 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -90,12 +90,14 @@ public:
// Set inputs
for (const std::int32_t input_ind : *tflite_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *tflite_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *tflite_subg->operators())
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc
index 94f51ddd6..50f3964db 100644
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/Scheduler.cc
@@ -22,9 +22,7 @@
#include <ir/TypeInfo.h>
#include <ir/DataType.h>
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
+#include <ir/operation/BinaryArithmetic.h>
#include <ir/operation/FullyConnected.h>
#include <gtest/gtest.h>
@@ -209,8 +207,7 @@ using OIS = OperandIndexSequence;
template <typename NodeT, typename... Types>
OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
{
- typename NodeT::Param op_params{Activation::NONE};
- auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
auto op_idx = graph->addOperation(std::move(op));
// For now in scheduler test all operations in tested graphs has same size (for simplicity)
assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
@@ -227,17 +224,20 @@ std::shared_ptr<Graph> createStraightGraph()
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create sub node
auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
// Create mul node
auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+ BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
graph->finishBuilding();
return graph;
@@ -261,31 +261,39 @@ std::shared_ptr<Graph> createBranchedGraph()
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create mul1 node
auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+ BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+ mul1_op_params);
// Create mul2 node
auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+ BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+ mul2_op_params);
// Create fc1 node
auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+ FullyConnected::Param fc1_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
// Create fc2 node
auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+ FullyConnected::Param fc2_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
- // Create add2 node
+ // Create sub node
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
graph->finishBuilding();
return graph;
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
index 0fcf372c3..806b47ecc 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -20,7 +20,7 @@
#include "ir/Graph.h"
#include "compiler/Compiler.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
@@ -54,16 +54,20 @@ public:
.at(operand_rhs2)
.data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
index 2e295ef40..09190bc58 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -21,7 +21,7 @@
#include "ir/Graph.h"
#include "interp/InterpExecutor.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
@@ -57,11 +57,13 @@ protected:
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
@@ -112,17 +114,21 @@ protected:
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
@@ -170,11 +176,13 @@ protected:
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index 63a948d7b..aab33fab5 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -47,8 +47,9 @@ TEST(ShapeInference, Pool2DNodeSame)
Stride stride{3, 7};
Padding padding{PaddingType::SAME};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -56,8 +57,9 @@ TEST(ShapeInference, Pool2DNodeSame)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -72,8 +74,9 @@ TEST(ShapeInference, Pool2DNodeValid)
Stride stride{3, 7};
Padding padding{PaddingType::VALID};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -81,8 +84,9 @@ TEST(ShapeInference, Pool2DNodeValid)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -98,8 +102,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
Stride stride{3, 7};
Padding padding{4, 3, 2, 1};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -107,8 +112,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -122,7 +128,8 @@ TEST(ShapeInference, Conv2D)
Shape in_shape{10, 6, 12, 20};
Shape ker_shape{30, 3, 6, 20};
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -131,7 +138,8 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -140,7 +148,8 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
index 3a6b40d6b..984dbfa2a 100644
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -38,6 +38,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
@@ -77,6 +85,8 @@ GeneratedTests.log_softmax_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
index f4bd48be5..a7bedf14b 100644
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -37,6 +37,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
@@ -80,6 +88,8 @@ GeneratedTests.log_softmax_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
index 3a6b40d6b..984dbfa2a 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -38,6 +38,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
@@ -77,6 +85,8 @@ GeneratedTests.log_softmax_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index fcd8b3e36..036c869c6 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -37,6 +37,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
@@ -79,6 +87,8 @@ GeneratedTests.log_softmax_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_not
GeneratedTests.logical_not_1D_nnfw
GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp
index a0ae9d3fe..9e51e759e 100644
--- a/tests/nnapi/nnapi_gtest.skip.noarch.interp
+++ b/tests/nnapi/nnapi_gtest.skip.noarch.interp
@@ -85,6 +85,14 @@ GeneratedTests.conv_quant8_large_weights_as_inputs
GeneratedTests.conv_quant8_overflow
GeneratedTests.conv_quant8_overflow_weights_as_inputs
GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
GeneratedTests.cos_ex_1D_float_nnfw
GeneratedTests.cos_ex_4D_float_nnfw
GeneratedTests.cos_ex_dynamic_nnfw
@@ -216,6 +224,8 @@ GeneratedTests.log_softmax_nnfw
GeneratedTests.log_softmax_nnfw_2
GeneratedTests.log_softmax_nnfw_3
GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
GeneratedTests.logical_and_1D_nnfw
GeneratedTests.logical_and_2D_nnfw
GeneratedTests.logical_and_3D_nnfw
diff --git a/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py b/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py
new file mode 100644
index 000000000..95ef901e4
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py
@@ -0,0 +1,69 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+layout = BoolScalar("layout", False) # NHWC
+
+# TEST 1: dilation set to 1 (default)
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+f1 = Parameter("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}", [.25, .25, .25, .25])
+b1 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o1 = Output("op4", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
+Model().Operation("CONV_2D", i1, f1, b1, 0, 0, 0, 0, 1, 1, 0, layout, 1, 1).To(o1)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+ i1: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+ f1: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+ b1: ("TENSOR_INT32", 0.0625, 0),
+ o1: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+ i1: [1.0, 1.0, 1.0, 1.0, 0.5, 1.0, 1.0, 1.0, 1.0],
+ o1: [.875, .875, .875, .875]
+}).AddInput(f1, b1).AddVariations("relaxed", quant8, "float16")
+
+
+# TEST 2: dilation set to 3
+i2 = Input("op1", "TENSOR_FLOAT32", "{1, 9, 9, 1}")
+f2 = Parameter("op2", "TENSOR_FLOAT32", "{1, 3, 3, 1}", [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
+b2 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o2 = Output("op4", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+Model().Operation("CONV_2D", i2, f2, b2, 0, 0, 0, 0, 1, 1, 0, layout, 3, 3).To(o2)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+ i2: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+ f2: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+ b2: ("TENSOR_INT32", 0.0625, 0),
+ o2: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+ i2: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+ o2: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
+}).AddInput(f2, b2).AddVariations("relaxed", quant8, "float16")
diff --git a/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py b/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py
index 347e1b0a6..580fd9e7c 100644
--- a/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py
@@ -71,3 +71,26 @@ test(
output_data=[-4.14297, -10.14297, -2.14297, -.142971,
-7.00104, -12.00104, -.00104087, -9.00104],
)
+
+def quant8_test(input0, output0, input_data, beta, axis, output_data):
+ model = Model().Operation("LOG_SOFTMAX", input0, beta, axis).To(output0)
+ quant8 = DataTypeConverter().Identify({
+ input0: ["TENSOR_QUANT8_ASYMM", 10 / 255.0],
+ output0: ["TENSOR_QUANT8_ASYMM", 16 / 256.0, 255],
+ })
+
+ Example({
+ input0: input_data,
+ output0: output_data,
+ }, model=model).AddVariations(quant8)
+
+quant8_test(
+ input0=Input("input0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+ output0=Output("output0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+ input_data=[0, 6, 2, 4,
+ 3, 2, 10, 1],
+ beta=1.0,
+ axis=3,
+ output_data=[-6.145078, -.145078, -4.145078, -2.145078,
+ -7.001370, -8.001370, -.001370, -9.001370],
+)
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt
index 6e0696d34..aa3a9421d 100644
--- a/tests/nnfw_api/CMakeLists.txt
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -14,6 +14,11 @@ file(GLOB_RECURSE RUNTIME_NNFW_API_TEST_SRC "src/*.cc" "src/*.cpp")
add_executable(${RUNTIME_NNFW_API_TEST} ${RUNTIME_NNFW_API_TEST_SRC})
+nnfw_find_package(ARMCompute QUIET)
+if(ARMCompute_FOUND)
+ target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND)
+endif(ARMCompute_FOUND)
+
set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
new file mode 100644
index 000000000..19cb95f37
--- /dev/null
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleGen.h"
+
+CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
+{
+ // 0th buffer is always the empty buffer for non-const tensors
+ addBuffer(nullptr, 0);
+}
+
+template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
+{
+ auto buf = reinterpret_cast<const uint8_t *>(buf_vec.data());
+ auto size = buf_vec.size() * sizeof(T);
+ return addBuffer(buf, size);
+}
+
+uint32_t CircleGen::addBuffer(const uint8_t *buf, size_t size)
+{
+ uint32_t ind = _buffers.size();
+ _buffers.emplace_back(buildBuffer(buf, size));
+ return ind;
+}
+
+uint32_t CircleGen::addTensor(const TensorParams &params)
+{
+ int ind = curSubgCtx().tensors.size();
+ curSubgCtx().tensors.emplace_back(buildTensor(params));
+ return ind;
+}
+
+void CircleGen::setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
+{
+ curSubgCtx().inputs = inputs;
+ curSubgCtx().outputs = outputs;
+}
+
+uint32_t CircleGen::nextSubgraph()
+{
+ uint32_t ind = _subgraph_contexts.size();
+ _subgraph_contexts.push_back({});
+ return ind;
+}
+
+CircleBuffer CircleGen::finish()
+{
+ std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs;
+ for (auto &ctx : _subgraph_contexts)
+ subgraphs.push_back(buildSubGraph(ctx));
+ auto model =
+ circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+ _fbb.Finish(model);
+ return CircleBuffer{std::move(_fbb)};
+}
+
+// ===== Add Operator methods begin =====
+
+uint32_t CircleGen::addOperatorAdd(const OperatorParams &params,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateAddOptions(_fbb, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
+ circle::BuiltinOptions_AddOptions, options);
+}
+
+uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
+ int stride_w, int stride_h, int filter_w, int filter_h,
+ circle::ActivationFunctionType actfn)
+{
+ auto options =
+ circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+ .Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
+ circle::BuiltinOptions_Pool2DOptions, options);
+}
+
+uint32_t CircleGen::addOperatorConcatenation(const OperatorParams &params, int axis,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateConcatenationOptions(_fbb, axis, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_CONCATENATION,
+ circle::BuiltinOptions_ConcatenationOptions, options);
+}
+
+uint32_t CircleGen::addOperatorCos(const OperatorParams &params)
+{
+ auto options = circle::CreateCosOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_COS,
+ circle::BuiltinOptions_CosOptions, options);
+}
+
+uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams &params)
+{
+ auto options = circle::CreateL2NormOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_L2_NORMALIZATION,
+ circle::BuiltinOptions_L2NormOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLess(const OperatorParams &params)
+{
+ auto options = circle::CreateLessOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_LESS,
+ circle::BuiltinOptions_LessOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alpha)
+{
+ auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_LEAKY_RELU,
+ circle::BuiltinOptions_LeakyReluOptions, options);
+}
+
+uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_NEG,
+ circle::BuiltinOptions_NegOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPad(const OperatorParams &params)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_PAD,
+ circle::BuiltinOptions_PadOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPadV2(const OperatorParams &params)
+{
+ auto options = circle::CreatePadOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_PADV2,
+ circle::BuiltinOptions_PadV2Options, options);
+}
+
+uint32_t CircleGen::addOperatorRank(const OperatorParams &params)
+{
+ auto options = circle::CreateRankOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_RANK,
+ circle::BuiltinOptions_RankOptions, options);
+}
+
+uint32_t CircleGen::addOperatorResizeNearestNeighbor(const OperatorParams &params)
+{
+ auto options = circle::CreateResizeNearestNeighborOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ circle::BuiltinOptions_ResizeNearestNeighborOptions, options);
+}
+
+uint32_t CircleGen::addOperatorWhile(const OperatorParams &params, uint32_t cond_subg,
+ uint32_t body_subg)
+{
+ auto options = circle::CreateWhileOptions(_fbb, cond_subg, body_subg).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_WHILE,
+ circle::BuiltinOptions_WhileOptions, options);
+}
+
+// NOTE Please add addOperator functions ABOVE this lie
+//
+// % How to add a new addOperatorXXX fuction
+// 0. Copy code from one of the existing addOperatorXXX function
+// 1. Change the function signature (need BuiltinOperator params)
+// 2. Change enum BuiltinOperator
+// 3. Change enum BuiltinOptions
+// 4. Change CreateXXXOptions accordingly
+
+// ===== Add Operator methods end =====
+
+uint32_t CircleGen::addOperatorWithOptions(const OperatorParams &params,
+ circle::BuiltinOperator opcode,
+ circle::BuiltinOptions options_type,
+ flatbuffers::Offset<void> options)
+{
+ uint32_t opcode_ind = addOperatorCode(opcode);
+ auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, &params.inputs, &params.outputs,
+ options_type, options);
+
+ uint32_t ind = curSubgCtx().operators.size();
+ curSubgCtx().operators.emplace_back(op);
+ return ind;
+}
+
+uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
+{
+ // TODO If the same OperatorCode is registered already, just return it
+ uint32_t ind = _opcodes.size();
+ _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
+ return ind;
+}
+
+flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
+{
+ if (buf == nullptr && size == 0)
+ return circle::CreateBuffer(_fbb);
+ auto buffer = _fbb.CreateVector(buf, size);
+ return circle::CreateBuffer(_fbb, buffer);
+}
+
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params)
+{
+ auto shape = _fbb.CreateVector(params.shape);
+ auto name = _fbb.CreateString(params.name);
+ return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+ 0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
+ 0 /* shape_signature */);
+}
+
+flatbuffers::Offset<circle::SubGraph> CircleGen::buildSubGraph(const SubgraphContext &ctx)
+{
+ return circle::CreateSubGraphDirect(_fbb, &ctx.tensors, &ctx.inputs, &ctx.outputs, &ctx.operators,
+ nullptr);
+}
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index 899c800b8..09ca5a5db 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -36,8 +36,8 @@ public:
_fbb.Finished(); // The build must have been finished, so check that here
}
- uint8_t *buffer() { return _fbb.GetBufferPointer(); }
- size_t size() { return _fbb.GetSize(); }
+ uint8_t *buffer() const { return _fbb.GetBufferPointer(); }
+ size_t size() const { return _fbb.GetSize(); }
private:
flatbuffers::FlatBufferBuilder _fbb;
@@ -67,12 +67,16 @@ public:
int version = 1;
};
-public:
- CircleGen()
+ struct SubgraphContext
{
- // 0th buffer is always the empty buffer for non-const tensors
- addBuffer(nullptr, 0);
- }
+ std::vector<int> inputs;
+ std::vector<int> outputs;
+ std::vector<flatbuffers::Offset<circle::Tensor>> tensors;
+ std::vector<flatbuffers::Offset<circle::Operator>> operators;
+ };
+
+public:
+ CircleGen();
template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
{
@@ -80,122 +84,50 @@ public:
auto size = buf_vec.size() * sizeof(T);
return addBuffer(buf, size);
}
-
- uint32_t addBuffer(const uint8_t *buf, size_t size)
- {
- uint32_t ind = _buffers.size();
- _buffers.emplace_back(buildBuffer(buf, size));
- return ind;
- }
-
- uint32_t addTensor(const TensorParams &params)
- {
- int ind = _tensors.size();
- _tensors.emplace_back(buildTensor(params));
- return ind;
- }
-
- uint32_t setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
- {
- _inputs = inputs;
- _outputs = outputs;
- }
-
- CircleBuffer finish()
- {
- // TODO Support multiple subgraphs, for now only single subgraph model is supported.
- std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs{buildSubGraph()};
- auto model =
- circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
- _fbb.Finish(model);
- return CircleBuffer{std::move(_fbb)};
- }
+ uint32_t addBuffer(const uint8_t *buf, size_t size);
+ uint32_t addTensor(const TensorParams &params);
+ void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
+ uint32_t nextSubgraph();
+ CircleBuffer finish();
// ===== Add Operator methods begin =====
- uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn)
- {
- auto options = circle::CreateAddOptions(_fbb, actfn).Union();
- return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
- circle::BuiltinOptions_AddOptions, options);
- }
-
+ uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn);
uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
int stride_w, int stride_h, int filter_w, int filter_h,
- circle::ActivationFunctionType actfn)
- {
- auto options =
- circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
- .Union();
- return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
- circle::BuiltinOptions_Pool2DOptions, options);
- }
+ circle::ActivationFunctionType actfn);
+ uint32_t addOperatorConcatenation(const OperatorParams &params, int axis,
+ circle::ActivationFunctionType actfn);
+ uint32_t addOperatorCos(const OperatorParams &params);
+ uint32_t addOperatorL2Normalization(const OperatorParams &params);
+ uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
+ uint32_t addOperatorLess(const OperatorParams &params);
+ uint32_t addOperatorNeg(const OperatorParams &params);
+ uint32_t addOperatorPad(const OperatorParams &params);
+ uint32_t addOperatorPadV2(const OperatorParams &params);
+ uint32_t addOperatorRank(const OperatorParams &params);
+ uint32_t addOperatorResizeNearestNeighbor(const OperatorParams &params);
+ uint32_t addOperatorWhile(const OperatorParams &params, uint32_t cond_subg, uint32_t body_subg);
// NOTE Please add addOperator functions ABOVE this lie
- //
- // % How to add a new addOperatorXXX fuction
- // 0. Copy code from one of the existing addOperatorXXX function
- // 1. Change the function signature (need BuiltinOperator params)
- // 2. Change enum BuiltinOperator
- // 3. Change enum BuiltinOptions
- // 4. Change CreateXXXOptions accordingly
-
// ===== Add Operator methods end =====
private:
uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
circle::BuiltinOptions options_type,
- flatbuffers::Offset<void> options)
- {
- uint32_t opcode_ind = addOperatorCode(opcode);
- auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, &params.inputs, &params.outputs,
- options_type, options);
+ flatbuffers::Offset<void> options);
+ uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+ flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
+ flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
+ flatbuffers::Offset<circle::SubGraph> buildSubGraph(const SubgraphContext &ctx);
- uint32_t ind = _operators.size();
- _operators.emplace_back(op);
- return ind;
- }
-
- uint32_t addOperatorCode(circle::BuiltinOperator opcode)
- {
- // TODO If the same OperatorCode is registered already, just return it
- uint32_t ind = _opcodes.size();
- _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
- return ind;
- }
-
- flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size)
- {
- if (buf == nullptr && size == 0)
- return circle::CreateBuffer(_fbb);
- auto buffer = _fbb.CreateVector(buf, size);
- return circle::CreateBuffer(_fbb, buffer);
- }
-
- flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params)
- {
- auto shape = _fbb.CreateVector(params.shape);
- auto name = _fbb.CreateString(params.name);
- return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
- 0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
- 0 /* shape_signature */);
- }
-
- flatbuffers::Offset<circle::SubGraph> buildSubGraph()
- {
- return circle::CreateSubGraphDirect(_fbb, &_tensors, &_inputs, &_outputs, &_operators, nullptr);
- }
+ SubgraphContext &curSubgCtx() { return _subgraph_contexts.back(); }
private:
flatbuffers::FlatBufferBuilder _fbb{1024};
std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes;
-
- // per-subgraph
- std::vector<int> _inputs;
- std::vector<int> _outputs;
- std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
- std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+ std::vector<SubgraphContext> _subgraph_contexts;
};
#endif // __NNFW_API_TEST_CIRCLE_GEN_H__
diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h
new file mode 100644
index 000000000..530ccdd8c
--- /dev/null
+++ b/tests/nnfw_api/src/GenModelTest.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include <fstream>
+#include <string>
+
+#include "CircleGen.h"
+#include "fixtures.h"
+
+struct TestCaseData
+{
+ /**
+ * @brief A vector of input buffers
+ *
+ * @todo support other types as well as float
+ */
+ std::vector<std::vector<float>> inputs;
+ /**
+ * @brief A vector of output buffers
+ *
+ * @todo support other types as well as float
+ */
+ std::vector<std::vector<float>> outputs;
+};
+
+class GenModelTestContext
+{
+public:
+ GenModelTestContext(CircleBuffer &&cbuf) : _cbuf{std::move(cbuf)}, _backends{"cpu"} {}
+
+ /**
+ * @brief Return circle buffer
+ *
+ * @return CircleBuffer& the circle buffer
+ */
+ const CircleBuffer &cbuf() const { return _cbuf; }
+
+ /**
+ * @brief Return test cases
+ *
+ * @return std::vector<TestCaseData>& the test cases
+ */
+ const std::vector<TestCaseData> &test_cases() const { return _test_cases; }
+
+ /**
+ * @brief Return backends
+ *
+ * @return const std::vector<std::string>& the backends to be tested
+ */
+ const std::vector<std::string> &backends() const { return _backends; }
+
+ /**
+ * @brief Return test is defined to fail on compile
+ *
+ * @return bool test is defined to fail on compile
+ */
+ const bool fail_compile() const { return _fail_compile; }
+
+ /**
+ * @brief Add a test case
+ *
+ * @param tc the test case to be added
+ */
+ void addTestCase(const TestCaseData &tc) { _test_cases.emplace_back(tc); }
+
+ /**
+ * @brief Add a test case
+ *
+ * @param tc the test case to be added
+ */
+ void setBackends(const std::vector<std::string> &backends)
+ {
+ _backends.clear();
+
+ for (auto backend : backends)
+ {
+#ifdef TEST_ACL_BACKEND
+ if (backend == "acl_cl" || backend == "acl_neon")
+ {
+ _backends.push_back(backend);
+ }
+#endif
+ if (backend == "cpu")
+ {
+ _backends.push_back(backend);
+ }
+ }
+ }
+
+ /**
+ * @brief Set the Test Fail
+ */
+ void setCompileFail() { _fail_compile = true; }
+
+private:
+ CircleBuffer _cbuf;
+ std::vector<TestCaseData> _test_cases;
+ std::vector<std::string> _backends;
+ bool _fail_compile{false};
+};
+
+/**
+ * @brief Generated Model test fixture for a one time inference
+ *
+ * This fixture is for one-time inference test with variety of generated models.
+ * It is the test maker's responsiblity to create @c _context which contains
+ * test body, which are generated circle buffer, model input data and output data and
+ * backend list to be tested.
+ * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
+ *
+ */
+class GenModelTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ { // DO NOTHING
+ }
+
+ void TearDown() override
+ {
+ for (std::string backend : _context->backends())
+ {
+ // NOTE If we can prepare many times for one model loading on same session,
+ // we can move nnfw_create_session to SetUp and
+ // nnfw_load_circle_from_buffer to outside forloop
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session));
+ auto &cbuf = _context->cbuf();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_so.session, backend.data()));
+
+ if (_context->fail_compile())
+ {
+ ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR);
+
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+ continue;
+ }
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
+
+ // In/Out buffer settings
+ uint32_t num_inputs;
+ NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
+ _so.inputs.resize(num_inputs);
+ for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
+ uint64_t input_elements = num_elems(&ti);
+ _so.inputs[ind].resize(input_elements);
+
+ ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+ sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+
+ uint32_t num_outputs;
+ NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
+ _so.outputs.resize(num_outputs);
+ for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
+ uint64_t output_elements = num_elems(&ti);
+ _so.outputs[ind].resize(output_elements);
+ ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
+ sizeof(float) * output_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+
+ // Set input values, run, and check output values
+ for (auto &test_case : _context->test_cases())
+ {
+ auto &ref_inputs = test_case.inputs;
+ auto &ref_outputs = test_case.outputs;
+ ASSERT_EQ(_so.inputs.size(), ref_inputs.size());
+ for (uint32_t i = 0; i < _so.inputs.size(); i++)
+ {
+ // Fill the values
+ ASSERT_EQ(_so.inputs[i].size(), ref_inputs[i].size());
+ memcpy(_so.inputs[i].data(), ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
+
+ ASSERT_EQ(_so.outputs.size(), ref_outputs.size());
+ for (uint32_t i = 0; i < _so.outputs.size(); i++)
+ {
+ // Check output tensor values
+ auto &ref_output = ref_outputs[i];
+ auto &output = _so.outputs[i];
+ ASSERT_EQ(output.size(), ref_output.size());
+ for (uint32_t e = 0; e < ref_output.size(); e++)
+ EXPECT_NEAR(ref_output[e], output[e], 0.001); // TODO better way for handling FP error?
+ }
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+ }
+ }
+
+protected:
+ SessionObject _so;
+ std::unique_ptr<GenModelTestContext> _context;
+};
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.cc
deleted file mode 100644
index 2bd839a78..000000000
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <nnfw_internal.h>
-
-#include <fstream>
-
-#include "CircleGen.h"
-#include "fixtures.h"
-
-/**
- * @brief Generated Model test fixture for a one time inference
- *
- * This fixture is for one-time inference test with variety of generated models.
- * It is the user's responsiblity to create @c _cbuf , @c _ref_inputs and @c _ref_outputs in the
- * test body, which are generated circle buffer, model input data and output data respectively.
- * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
- *
- */
-class GenModelTest : public ::testing::Test
-{
-protected:
- void SetUp() override { NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); }
-
- void TearDown() override
- {
- NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, _cbuf.buffer(), _cbuf.size()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
-
- // In/Out buffer settings
- {
- uint32_t num_inputs;
- NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
- _so.inputs.resize(num_inputs);
- for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
- {
- nnfw_tensorinfo ti;
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
- uint64_t input_elements = num_elems(&ti);
- _so.inputs[ind].resize(input_elements);
-
- ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
- sizeof(float) * input_elements),
- NNFW_STATUS_NO_ERROR);
- }
-
- uint32_t num_outputs;
- NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
- _so.outputs.resize(num_outputs);
- for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
- {
- nnfw_tensorinfo ti;
- NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
- uint64_t output_elements = num_elems(&ti);
- _so.outputs[ind].resize(output_elements);
- ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
- sizeof(float) * output_elements),
- NNFW_STATUS_NO_ERROR);
- }
- }
-
- // Set input values, run, and check output values
- {
- ASSERT_EQ(_so.inputs.size(), _ref_inputs.size());
- for (uint32_t i = 0; i < _so.inputs.size(); i++)
- {
- // Fill the values
- ASSERT_EQ(_so.inputs[i].size(), _ref_inputs[i].size());
- memcpy(_so.inputs[i].data(), _ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
- }
-
- NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
-
- ASSERT_EQ(_so.outputs.size(), _ref_outputs.size());
- for (uint32_t i = 0; i < _so.outputs.size(); i++)
- {
- // Check output tensor values
- auto &ref_output = _ref_outputs[i];
- auto &output = _so.outputs[i];
- ASSERT_EQ(output.size(), ref_output.size());
- for (uint32_t e = 0; e < ref_output.size(); e++)
- ASSERT_FLOAT_EQ(ref_output[e], output[e]);
- }
- }
-
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
- }
-
-protected:
- SessionObject _so;
- CircleBuffer _cbuf;
- std::vector<std::vector<float>> _ref_inputs;
- std::vector<std::vector<float>> _ref_outputs;
-};
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
- CircleGen cgen;
- std::vector<float> rhs_data{5, 4, 7, 4};
- uint32_t rhs_buf = cgen.addBuffer(rhs_data);
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}};
- _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}, {5, 4, 7, 4}};
- _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_AvgPool2D)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- _cbuf = cgen.finish();
-
- _ref_inputs = {{1, 3, 2, 4}};
- _ref_outputs = {{2.5}};
-}
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.cc
index c1f4369d6..e2d70d2c0 100644
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.cc
@@ -20,6 +20,7 @@
#include "common.h"
#include "fixtures.h"
#include "NNPackages.h"
+#include "CircleGen.h"
void set_input_output(nnfw_session *session, const std::vector<float> &input,
std::vector<float> &actual_output)
@@ -58,6 +59,7 @@ void set_input_output(nnfw_session *session, const std::vector<float> &input0,
*
* @note Run this test with "cpu" backend
*/
+// TODO Rewrite this with CircleGen
class TestDynamicTensorReshapeModelLoaded
: public ValidationTestModelLoaded<NNPackages::DYNAMIC_TENSOR_RESHAPE>
{
@@ -209,30 +211,6 @@ TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_multiple_executions)
// Trying to set unknown dim to other value before calling nnfw_prepare()
//
-class TestInputUnknownDimInputConcatModelLoaded
- : public ValidationTestModelLoaded<NNPackages::UNKNOWN_DIM_INPUT_CONCAT>
-{
-protected:
- void prepare_apply_set_input_output(const std::vector<float> &input0,
- const std::vector<float> &input1,
- std::vector<float> *actual_output, nnfw_tensorinfo input0_ti)
- {
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
-
- ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
- sizeof(float) * input0.size()),
- NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
- sizeof(float) * input1.size()),
- NNFW_STATUS_NO_ERROR);
-
- ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
- sizeof(float) * actual_output->size()),
- NNFW_STATUS_NO_ERROR);
- }
-};
-
/**
* @brief Testing the following model:
*
@@ -248,9 +226,28 @@ protected:
*
* @note Run this test with "cpu" backend
*/
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
+auto build_model_buf_Concatenation_unknwon_dims()
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ // Model is not important
+ CircleGen cgen;
+ auto f32 = circle::TensorType::TensorType_FLOAT32;
+ int in1 = cgen.addTensor({{1, 1}, f32}); // consider this [None, None]
+ int in2 = cgen.addTensor({{2, 3}, f32});
+ int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+ cgen.addOperatorConcatenation({{in1, in2}, {out}}, 0, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in1, in2}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
+
+TEST(TestDynamicTensor, concat_unknown_dim_input0_to_2x3)
+{
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [1, 3]
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -260,13 +257,13 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
// input reshaping to [1, 3]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
- set_input_output(_session, input0, input1, actual_output);
+ set_input_output(session, input0, input1, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
@@ -289,9 +286,14 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
*
* @note Run this test with "cpu" backend and "linear" executor
*/
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_shape)
+TEST(TestDynamicTensor, neg_concat_input0_to_wrong_shape)
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [3, 1], wrong shape
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -300,9 +302,9 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_sha
// input reshaping to [3, 1]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
}
//
@@ -325,12 +327,30 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_sha
*
* @note Run this test with "cpu" backend
*/
-using TestDynamicTensorApplyTensorInfoBinaryOp =
- ValidationTestModelLoaded<NNPackages::ADD_UNSPECIFIED_RANK_INPUTS>;
+auto build_model_buf_Add_unspecified_rank()
+{
+ // Model is not important
+ CircleGen cgen;
+ auto f32 = circle::TensorType::TensorType_FLOAT32;
+ int in1 = cgen.addTensor({{}, f32}); // scalar, meaning shape is unspecified
+ int in2 = cgen.addTensor({{1, 2, 3}, f32});
+ int op_out = cgen.addTensor({{}, f32}); // unspecified
+ int out = cgen.addTensor({{}, f32}); // unspecified
+ cgen.addOperatorAdd({{in1, in2}, {op_out}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{op_out, op_out}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in1, in2}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
-TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_compilation_add)
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_add)
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_Add_unspecified_rank();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
// input reshaping to [2, 2, 3]
nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
@@ -341,14 +361,14 @@ TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_comp
std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2, 5.1 * 2, 6.1 * 2,
7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
- set_input_output(_session, input0, input1, actual_output);
+ set_input_output(session, input0, input1, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
@@ -370,11 +390,27 @@ TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_comp
*
* @note Run this test with "cpu" backend
*/
-using TestDynamicTensorApplyTensorInfoUnaryOp = ValidationTestModelLoaded<NNPackages::NEG>;
-TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compilation_neg)
+auto build_model_buf_NEG()
{
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ // Model is not important
+ CircleGen cgen;
+ int in = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorNeg({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ auto cbuf = cgen.finish();
+ return cbuf;
+}
+
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_neg)
+{
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ const auto model_buf = build_model_buf_NEG();
+ nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size());
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
@@ -397,28 +433,28 @@ TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compi
expected_output[i] = -1 * input0[i];
}
- NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
// input shape check
{
nnfw_tensorinfo ti = {};
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
}
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
// input shape check
{
nnfw_tensorinfo ti = {};
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
}
- set_input_output(_session, input0, actual_output);
+ set_input_output(session, input0, actual_output);
// Do inference
- NNFW_STATUS res = nnfw_run(_session);
+ NNFW_STATUS res = nnfw_run(session);
NNFW_ENSURE_SUCCESS(res);
// output value check
@@ -469,8 +505,7 @@ TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
set_input_output(_session, while_dynamic_input0, actual_output0);
- // TODO Change error code NNFW_STATUS_ERROR -> NNFW_INSUFFICIENT_OUTPUT_SIZE
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE);
}
using TestIfDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::IF_DYNAMIC>;
diff --git a/tests/nnfw_api/src/NNPackages.cc b/tests/nnfw_api/src/NNPackages.cc
index ebae2ea26..534973cb0 100644
--- a/tests/nnfw_api/src/NNPackages.cc
+++ b/tests/nnfw_api/src/NNPackages.cc
@@ -29,8 +29,7 @@ const char *TEST_PACKAGE_NAMES[] = {
"add", "add_no_manifest", "add_invalid_manifest",
// for dynamic tensor test
- "input_reshaping_add", "dynamic_tensor_reshape", "unknown_dim_input_concat",
- "add_unspecified_rank_inputs", "neg", "while_dynamic", "if_dynamic",
+ "input_reshaping_add", "dynamic_tensor_reshape", "while_dynamic", "if_dynamic",
};
NNPackages &NNPackages::get()
diff --git a/tests/nnfw_api/src/NNPackages.h b/tests/nnfw_api/src/NNPackages.h
index 7f41d6b44..735fa96a0 100644
--- a/tests/nnfw_api/src/NNPackages.h
+++ b/tests/nnfw_api/src/NNPackages.h
@@ -23,7 +23,7 @@
* @brief A helper class to find NN Packages for testing
* To add a nnpackage for your test, please do the followings:
* 0. Prerequisite: the actual file must be uploaded on the server
- * Add `config.sh` file to `tests/scripts/nnfw_api_gtest_models`
+ * Add `config.sh` file to `tests/scripts/models/nnfw_api_gtest`
* 1. Append an enum value to @c NNPackages::TestPackages
* 2. Append a string literal to @c TEST_PACKAGE_NAMES in the source file
*/
@@ -45,9 +45,6 @@ public:
// for dynamic tensor test
INPUT_RESHAPING_ADD,
DYNAMIC_TENSOR_RESHAPE,
- UNKNOWN_DIM_INPUT_CONCAT,
- ADD_UNSPECIFIED_RANK_INPUTS,
- NEG,
WHILE_DYNAMIC,
IF_DYNAMIC,
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.cc
index e4dfa9118..05914b839 100644
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ b/tests/nnfw_api/src/RegressionTests.cc
@@ -17,6 +17,10 @@
#include "fixtures.h"
#include "NNPackages.h"
+#include <nnfw_internal.h>
+
+#include "CircleGen.h"
+
TEST_F(RegressionTest, github_1535)
{
auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
@@ -35,4 +39,26 @@ TEST_F(RegressionTest, github_1535)
NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
+
+ SUCCEED();
+}
+
+TEST_F(RegressionTest, neg_github_3826)
+{
+ // Model is not important
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ auto cbuf = cgen.finish();
+
+ nnfw_session *session = nullptr;
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+ // To test when there is no backends loaded for the session
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "unavailable_backend"));
+ ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
}
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
index 11c603494..1d3d4fc93 100644
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
@@ -19,11 +19,18 @@
using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
-TEST_F(ValidationTestAddModelLoaded, prepare_001) { NNFW_ENSURE_SUCCESS(nnfw_prepare(_session)); }
+TEST_F(ValidationTestAddModelLoaded, prepare_001)
+{
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+ SUCCEED();
+}
TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
{
NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+
+ SUCCEED();
}
TEST_F(ValidationTestAddModelLoaded, get_input_size)
@@ -48,6 +55,17 @@ TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
ASSERT_EQ(tensor_info.dims[0], 1);
}
+TEST_F(ValidationTestAddModelLoaded, input_output_tensorindex)
+{
+ uint32_t in_ind = 100;
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &in_ind));
+ ASSERT_EQ(in_ind, 0);
+
+ uint32_t out_ind = 100;
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &out_ind));
+ ASSERT_EQ(out_ind, 0);
+}
+
TEST_F(ValidationTestAddModelLoaded, neg_run)
{
// nnfw_prepare is not called
@@ -91,3 +109,16 @@ TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
// tensor_info is null
ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
+
+TEST_F(ValidationTestAddModelLoaded, neg_input_output_tensorindex)
+{
+ uint32_t in_ind = 100;
+ ASSERT_EQ(nnfw_input_tensorindex(_session, "ADD_TOP", &in_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(in_ind, 100);
+ ASSERT_EQ(nnfw_input_tensorindex(_session, "y_var", &in_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(in_ind, 100);
+
+ uint32_t out_ind = 100;
+ ASSERT_EQ(nnfw_output_tensorindex(_session, "X_input", &out_ind), NNFW_STATUS_ERROR);
+ ASSERT_EQ(out_ind, 100);
+}
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
index 4e2a9055a..e09402b01 100644
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
@@ -23,6 +23,8 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_001)
{
NNFW_ENSURE_SUCCESS(nnfw_run(_objects[0].session));
NNFW_ENSURE_SUCCESS(nnfw_run(_objects[1].session));
+
+ SUCCEED();
}
TEST_F(ValidationTestFourAddModelsSetInput, run_002)
@@ -33,6 +35,8 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_002)
for (auto obj : _objects)
NNFW_ENSURE_SUCCESS(nnfw_run(obj.session));
}
+
+ SUCCEED();
}
TEST_F(ValidationTestFourAddModelsSetInput, run_async)
@@ -41,4 +45,6 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_async)
NNFW_ENSURE_SUCCESS(nnfw_run_async(obj.session));
for (auto obj : _objects)
NNFW_ENSURE_SUCCESS(nnfw_await(obj.session));
+
+ SUCCEED();
}
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.cc
index dafcd369f..4ef14f745 100644
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.cc
@@ -29,6 +29,8 @@ TEST_F(ValidationTestSessionCreated, close_and_create_again)
{
NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+
+ SUCCEED();
}
TEST_F(ValidationTestSessionCreated, neg_load_session_1)
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.cc
index 5e6027f91..c74649203 100644
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.cc
@@ -21,12 +21,16 @@ TEST_F(ValidationTestSingleSession, create_001)
{
NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+
+ SUCCEED();
}
TEST_F(ValidationTestSingleSession, query_info_u32)
{
uint32_t val = 0;
NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
+
+ SUCCEED();
}
TEST_F(ValidationTestSingleSession, neg_create_001)
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h
index 8fe5c1667..f273d6553 100644
--- a/tests/nnfw_api/src/fixtures.h
+++ b/tests/nnfw_api/src/fixtures.h
@@ -19,7 +19,7 @@
#include <array>
#include <gtest/gtest.h>
-#include <nnfw.h>
+#include <nnfw_experimental.h>
#include "NNPackages.h"
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc
new file mode 100644
index 000000000..281d5ded5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Add.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 7, 4};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}}, {{6, 7, 9, 8}}});
+ _context->addTestCase({{{0, 1, 2, 3}}, {{5, 5, 9, 7}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
new file mode 100644
index 000000000..854517e47
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_AvgPool2D)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 3, 2, 4}}, {{2.5}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.cc
new file mode 100644
index 000000000..72bfe3e2f
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cos.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Cos)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorCos({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ const float pi = 3.141592653589793;
+ _context->addTestCase({{{0, pi / 2, pi, 7}}, {{1, 0, -1, 0.75390225434}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cos_TwoOperand)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorCos({{lhs, rhs}, {out1, out2}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out1, out2});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
new file mode 100644
index 000000000..8b4b8f5b6
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_L2Normalization)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorL2Normalization({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
+ {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
+ 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
index 0e3d5b69b..9db911734 100644
--- a/runtime/onert/core/src/ir/operation/Quantize.cc
+++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
@@ -14,24 +14,19 @@
* limitations under the License.
*/
-#include "ir/operation/Quantize.h"
+#include "GenModelTest.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
+TEST_F(GenModelTest, OneOp_LeakyRelu)
{
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
+ cgen.setInputsAndOutputs({in}, {out});
-void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}});
+ _context->setBackends({"acl_cl", "acl_neon"});
-Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
+ SUCCEED();
}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc
new file mode 100644
index 000000000..10fe6c78a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Pad.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Pad)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.cc
new file mode 100644
index 000000000..9f7ff9c0e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_PadV2)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<float> padding_value_data{3.0};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+ int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setCompileFail();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.cc
new file mode 100644
index 000000000..ed9d67294
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Rank.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// WORKAROUND Handle int32_t type input/output
+union float_int {
+ int32_t i;
+ float f;
+};
+
+TEST_F(GenModelTest, OneOp_Rank)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+ // TODO handle many type in addTestCase
+ float_int output_data;
+ output_data.i = 4;
+
+ cgen.addOperatorRank({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Rank_Int32)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+ // TODO handle many type in addTestCase
+ float_int output_data;
+ output_data.i = 4;
+
+ cgen.addOperatorRank({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
new file mode 100644
index 000000000..94f45d4a5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> size_data{3, 3};
+ uint32_t size_buf = cgen.addBuffer(size_data);
+ int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, size_buf});
+
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorResizeNearestNeighbor({{in, size}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{3, 4, 6, 10, 9, 10, 12, 16}},
+ {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}});
+ _context->setBackends({"acl_cl"});
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc
new file mode 100644
index 000000000..1d86e6d6a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/While.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+ // The model looks just like the below pseudocode
+ //
+ // function model(x)
+ // {
+ // while (x < 100.0)
+ // {
+ // x = x + 10.0;
+ // }
+ // return x
+ // }
+
+ CircleGen cgen;
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+ std::vector<float> end_data{100};
+ uint32_t end_buf = cgen.addBuffer(end_data);
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase({{{0}}, {{100}}});
+ _context->addTestCase({{{2}}, {{102}}});
+ _context->addTestCase({{{22}}, {{102}}});
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt
index 82235d9f6..40e0dfdaa 100644
--- a/tests/scripts/CMakeLists.txt
+++ b/tests/scripts/CMakeLists.txt
@@ -13,13 +13,13 @@ install(DIRECTORY command DESTINATION test)
file(GLOB MODEL_TEST_SCRIPT "models/run_test.sh")
install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
-# Install models test list file
-file(GLOB MODEL_TEST_DIR models/config)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models)
+# Install models test config
+file(GLOB TFLITE_CONFIG_DIR models/tflite)
+install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
# Install nnpackage test config
-file(GLOB MODEL_TEST_DIR LIST_DIRECTORIES true nnfw_api_gtest/models/*)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models/nnpackage)
+file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
+install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
# Install test list
file(GLOB TEST_LIST_DIR list)
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model
index feb658c3c..3feb7a799 100644
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -57,7 +57,7 @@ fi
if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
# Download nnpackage model
- NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnpackage/
+ NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
--configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
diff --git a/tests/scripts/common.sh b/tests/scripts/common.sh
index 87aec86b3..a2a261a9c 100755
--- a/tests/scripts/common.sh
+++ b/tests/scripts/common.sh
@@ -30,7 +30,7 @@ function get_result_of_benchmark_test()
exit $RET
fi
- local RESULT=`grep -E '^- Mean:' $LOG_FILE | sed -e 's/ms//g' | awk '{print $3}'`
+ local RESULT=`grep -E '^- MEAN ' $LOG_FILE | awk '{print $4}'`
echo "$RESULT"
}
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl
index 866f734e0..ac674352c 100644
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl
@@ -1,27 +1,42 @@
-NET_000[0-5,7-9]
-NET_001[0,2-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon
index 7e2371b26..6612b4eb1 100644
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon
@@ -1,27 +1,42 @@
-NET_000[0-5,7-9]
-NET_001[0-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+#DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+#Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu b/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu
index cd765a465..6c71a8af6 100644
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu
@@ -1,17 +1,42 @@
-NET_000[0-4,6-9]
-NET_001[0-3,6-9]
-NET_002[0-2,4-7,9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+ArgMax_000.opt
+ArgMax_001.opt
+ArgMax_002.opt
+ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+L2Normalize_U8_000.opt
+Logistic_000.opt
+Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn b/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn
deleted file mode 100644
index a893d59ae..000000000
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn
+++ /dev/null
@@ -1,5 +0,0 @@
-NET_000[1,3,7-9]
-NET_001[6,9]
-NET_002[2,8]
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
diff --git a/tests/scripts/list/nnpkg_test_list.noarch.interp b/tests/scripts/list/nnpkg_test_list.noarch.interp
index cb865da84..cc4e034ba 100644
--- a/tests/scripts/list/nnpkg_test_list.noarch.interp
+++ b/tests/scripts/list/nnpkg_test_list.noarch.interp
@@ -1,19 +1,42 @@
-NET_000[0-9]
-NET_001[0-4,6-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+#AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+#Mean_000.opt
+#Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+#Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+#SpaceToDepth_U8_000.opt
+#Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+#Transpose_000.opt
+#Unpack_000.opt
+#Unpack_001.opt
+#Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/nnfw_api_gtest/models/add/config.sh b/tests/scripts/models/nnfw_api_gtest/add/config.sh
index e6e8677e7..e6e8677e7 100644
--- a/tests/scripts/nnfw_api_gtest/models/add/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/add/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_invalid_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
index 92c903274..92c903274 100644
--- a/tests/scripts/nnfw_api_gtest/models/add_invalid_manifest/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_no_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
index 0d697a2a0..0d697a2a0 100644
--- a/tests/scripts/nnfw_api_gtest/models/add_no_manifest/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_unspecified_rank_inputs/config.sh b/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh
index 2cc30f915..2cc30f915 100644
--- a/tests/scripts/nnfw_api_gtest/models/add_unspecified_rank_inputs/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/dynamic_tensor_reshape/config.sh b/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh
index 8496509cb..8496509cb 100644
--- a/tests/scripts/nnfw_api_gtest/models/dynamic_tensor_reshape/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/if_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
index 3b8506c6f..3b8506c6f 100644
--- a/tests/scripts/nnfw_api_gtest/models/if_dynamic/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/input_reshaping_add/config.sh b/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh
index 4e5a636ee..4e5a636ee 100644
--- a/tests/scripts/nnfw_api_gtest/models/input_reshaping_add/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/neg/config.sh b/tests/scripts/models/nnfw_api_gtest/neg/config.sh
index fdf093686..fdf093686 100644
--- a/tests/scripts/nnfw_api_gtest/models/neg/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/neg/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/unknown_dim_input_concat/config.sh b/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh
index a3cab4787..a3cab4787 100644
--- a/tests/scripts/nnfw_api_gtest/models/unknown_dim_input_concat/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/while_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
index ff14d4e05..ff14d4e05 100644
--- a/tests/scripts/nnfw_api_gtest/models/while_dynamic/config.sh
+++ b/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
diff --git a/tests/scripts/models/run_test.sh b/tests/scripts/models/run_test.sh
index 0aa363f49..32a277859 100755
--- a/tests/scripts/models/run_test.sh
+++ b/tests/scripts/models/run_test.sh
@@ -18,7 +18,7 @@
MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"config"
+TEST_ROOT_PATH=$MY_PATH/"tflite"
REPORT_DIR="report"
RUN_DISABLED="true"
@@ -131,7 +131,7 @@ if [ ! -d "$TEST_ROOT_PATH" ]; then
fi
# Check test driver setting
-if [ ! command_exists $DRIVER_BIN ] && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
exit 1
fi
diff --git a/tests/scripts/models/config/MODELS/inception_module/config.sh b/tests/scripts/models/tflite/MODELS/inception_module/config.sh
index 3f6eae9ee..3f6eae9ee 100755
--- a/tests/scripts/models/config/MODELS/inception_module/config.sh
+++ b/tests/scripts/models/tflite/MODELS/inception_module/config.sh
diff --git a/tests/scripts/models/config/MODELS/inception_nonslim/config.sh b/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
index 39f5d772d..39f5d772d 100755
--- a/tests/scripts/models/config/MODELS/inception_nonslim/config.sh
+++ b/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
diff --git a/tests/scripts/models/config/MODELS/inception_slim/config.sh b/tests/scripts/models/tflite/MODELS/inception_slim/config.sh
index 1c0cf3ef5..1c0cf3ef5 100755
--- a/tests/scripts/models/config/MODELS/inception_slim/config.sh
+++ b/tests/scripts/models/tflite/MODELS/inception_slim/config.sh
diff --git a/tests/scripts/models/config/MODELS/mobilenet/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet/config.sh
index b23d687cd..b23d687cd 100755
--- a/tests/scripts/models/config/MODELS/mobilenet/config.sh
+++ b/tests/scripts/models/tflite/MODELS/mobilenet/config.sh
diff --git a/tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
index 2e304df92..2e304df92 100755
--- a/tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh
+++ b/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
diff --git a/tests/scripts/models/config/abs/config.sh b/tests/scripts/models/tflite/abs/config.sh
index 7acdefa5a..7acdefa5a 100755
--- a/tests/scripts/models/config/abs/config.sh
+++ b/tests/scripts/models/tflite/abs/config.sh
diff --git a/tests/scripts/models/config/add/1D/config.sh b/tests/scripts/models/tflite/add/1D/config.sh
index ca6fafe26..ca6fafe26 100755
--- a/tests/scripts/models/config/add/1D/config.sh
+++ b/tests/scripts/models/tflite/add/1D/config.sh
diff --git a/tests/scripts/models/config/add/4D/config.sh b/tests/scripts/models/tflite/add/4D/config.sh
index d9e93d17b..d9e93d17b 100755
--- a/tests/scripts/models/config/add/4D/config.sh
+++ b/tests/scripts/models/tflite/add/4D/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/aligned/config.sh b/tests/scripts/models/tflite/average_pool_2d/aligned/config.sh
index cdefb077e..cdefb077e 100755
--- a/tests/scripts/models/config/average_pool_2d/aligned/config.sh
+++ b/tests/scripts/models/tflite/average_pool_2d/aligned/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/avgpool1/config.sh b/tests/scripts/models/tflite/average_pool_2d/avgpool1/config.sh
index 7e0130088..7e0130088 100755
--- a/tests/scripts/models/config/average_pool_2d/avgpool1/config.sh
+++ b/tests/scripts/models/tflite/average_pool_2d/avgpool1/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/avgpool2/config.sh b/tests/scripts/models/tflite/average_pool_2d/avgpool2/config.sh
index 1ef0b17ca..1ef0b17ca 100755
--- a/tests/scripts/models/config/average_pool_2d/avgpool2/config.sh
+++ b/tests/scripts/models/tflite/average_pool_2d/avgpool2/config.sh
diff --git a/tests/scripts/models/config/batch_to_space_nd2/config.sh b/tests/scripts/models/tflite/batch_to_space_nd2/config.sh
index 1dbcb7681..1dbcb7681 100755
--- a/tests/scripts/models/config/batch_to_space_nd2/config.sh
+++ b/tests/scripts/models/tflite/batch_to_space_nd2/config.sh
diff --git a/tests/scripts/models/config/cast/config.sh b/tests/scripts/models/tflite/cast/config.sh
index 0e123e3be..0e123e3be 100755
--- a/tests/scripts/models/config/cast/config.sh
+++ b/tests/scripts/models/tflite/cast/config.sh
diff --git a/tests/scripts/models/config/concat/2D/config.sh b/tests/scripts/models/tflite/concat/2D/config.sh
index fd22e708c..fd22e708c 100755
--- a/tests/scripts/models/config/concat/2D/config.sh
+++ b/tests/scripts/models/tflite/concat/2D/config.sh
diff --git a/tests/scripts/models/config/concat/concat1/config.sh b/tests/scripts/models/tflite/concat/concat1/config.sh
index 4543b163a..4543b163a 100755
--- a/tests/scripts/models/config/concat/concat1/config.sh
+++ b/tests/scripts/models/tflite/concat/concat1/config.sh
diff --git a/tests/scripts/models/config/concat/concat2/config.sh b/tests/scripts/models/tflite/concat/concat2/config.sh
index f4404e471..f4404e471 100755
--- a/tests/scripts/models/config/concat/concat2/config.sh
+++ b/tests/scripts/models/tflite/concat/concat2/config.sh
diff --git a/tests/scripts/models/config/conv_2d/convolution1/config.sh b/tests/scripts/models/tflite/conv_2d/convolution1/config.sh
index 46a205fdb..46a205fdb 100755
--- a/tests/scripts/models/config/conv_2d/convolution1/config.sh
+++ b/tests/scripts/models/tflite/conv_2d/convolution1/config.sh
diff --git a/tests/scripts/models/config/conv_2d/convolution2/config.sh b/tests/scripts/models/tflite/conv_2d/convolution2/config.sh
index eca5b3374..eca5b3374 100755
--- a/tests/scripts/models/config/conv_2d/convolution2/config.sh
+++ b/tests/scripts/models/tflite/conv_2d/convolution2/config.sh
diff --git a/tests/scripts/models/config/custom/squared_difference/config.sh b/tests/scripts/models/tflite/custom/squared_difference/config.sh
index 745a84447..745a84447 100755
--- a/tests/scripts/models/config/custom/squared_difference/config.sh
+++ b/tests/scripts/models/tflite/custom/squared_difference/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv1/config.sh
index 39aa4a45f..39aa4a45f 100755
--- a/tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh
+++ b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv1/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv2/config.sh
index 3dd7c50b3..3dd7c50b3 100755
--- a/tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh
+++ b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv2/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d_no_fuse/config.sh
index 13fb264f0..13fb264f0 100755
--- a/tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh
+++ b/tests/scripts/models/tflite/depthwise_conv_2d_no_fuse/config.sh
diff --git a/tests/scripts/models/config/div/broadcast/config.sh b/tests/scripts/models/tflite/div/broadcast/config.sh
index 7c5e985fa..7c5e985fa 100755
--- a/tests/scripts/models/config/div/broadcast/config.sh
+++ b/tests/scripts/models/tflite/div/broadcast/config.sh
diff --git a/tests/scripts/models/config/embedding_lookup/config.sh b/tests/scripts/models/tflite/embedding_lookup/config.sh
index 5e5e4ad92..5e5e4ad92 100755
--- a/tests/scripts/models/config/embedding_lookup/config.sh
+++ b/tests/scripts/models/tflite/embedding_lookup/config.sh
diff --git a/tests/scripts/models/config/equal/config.sh b/tests/scripts/models/tflite/equal/config.sh
index a43fd73f7..a43fd73f7 100755
--- a/tests/scripts/models/config/equal/config.sh
+++ b/tests/scripts/models/tflite/equal/config.sh
diff --git a/tests/scripts/models/config/exp/config.sh b/tests/scripts/models/tflite/exp/config.sh
index 944f0bbce..944f0bbce 100755
--- a/tests/scripts/models/config/exp/config.sh
+++ b/tests/scripts/models/tflite/exp/config.sh
diff --git a/tests/scripts/models/config/floor/floor1/config.sh b/tests/scripts/models/tflite/floor/floor1/config.sh
index 4952e4a54..4952e4a54 100755
--- a/tests/scripts/models/config/floor/floor1/config.sh
+++ b/tests/scripts/models/tflite/floor/floor1/config.sh
diff --git a/tests/scripts/models/config/floor/floor2/config.sh b/tests/scripts/models/tflite/floor/floor2/config.sh
index 24581dc33..24581dc33 100755
--- a/tests/scripts/models/config/floor/floor2/config.sh
+++ b/tests/scripts/models/tflite/floor/floor2/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/fc1/config.sh b/tests/scripts/models/tflite/fullyconnected/fc1/config.sh
index 013361583..013361583 100755
--- a/tests/scripts/models/config/fullyconnected/fc1/config.sh
+++ b/tests/scripts/models/tflite/fullyconnected/fc1/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/hybrid/config.sh b/tests/scripts/models/tflite/fullyconnected/hybrid/config.sh
index b2d8ffe86..b2d8ffe86 100755
--- a/tests/scripts/models/config/fullyconnected/hybrid/config.sh
+++ b/tests/scripts/models/tflite/fullyconnected/hybrid/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/matmul2x2/config.sh b/tests/scripts/models/tflite/fullyconnected/matmul2x2/config.sh
index 91fd2ffce..91fd2ffce 100755
--- a/tests/scripts/models/config/fullyconnected/matmul2x2/config.sh
+++ b/tests/scripts/models/tflite/fullyconnected/matmul2x2/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/weights_as_input/config.sh b/tests/scripts/models/tflite/fullyconnected/weights_as_input/config.sh
index 1c218c5f4..1c218c5f4 100755
--- a/tests/scripts/models/config/fullyconnected/weights_as_input/config.sh
+++ b/tests/scripts/models/tflite/fullyconnected/weights_as_input/config.sh
diff --git a/tests/scripts/models/config/gather/config.sh b/tests/scripts/models/tflite/gather/config.sh
index 0f100a823..0f100a823 100755
--- a/tests/scripts/models/config/gather/config.sh
+++ b/tests/scripts/models/tflite/gather/config.sh
diff --git a/tests/scripts/models/config/greater/config.sh b/tests/scripts/models/tflite/greater/config.sh
index aba3d4a3f..aba3d4a3f 100755
--- a/tests/scripts/models/config/greater/config.sh
+++ b/tests/scripts/models/tflite/greater/config.sh
diff --git a/tests/scripts/models/config/greater_equal/config.sh b/tests/scripts/models/tflite/greater_equal/config.sh
index 72beaa81f..72beaa81f 100755
--- a/tests/scripts/models/config/greater_equal/config.sh
+++ b/tests/scripts/models/tflite/greater_equal/config.sh
diff --git a/tests/scripts/models/config/hashtable_lookup/config.sh b/tests/scripts/models/tflite/hashtable_lookup/config.sh
index 3222ee4d2..3222ee4d2 100755
--- a/tests/scripts/models/config/hashtable_lookup/config.sh
+++ b/tests/scripts/models/tflite/hashtable_lookup/config.sh
diff --git a/tests/scripts/models/config/l2_normalization/config.sh b/tests/scripts/models/tflite/l2_normalization/config.sh
index 47801240f..47801240f 100755
--- a/tests/scripts/models/config/l2_normalization/config.sh
+++ b/tests/scripts/models/tflite/l2_normalization/config.sh
diff --git a/tests/scripts/models/config/l2_pool_2d/config.sh b/tests/scripts/models/tflite/l2_pool_2d/config.sh
index a77aa66cf..a77aa66cf 100755
--- a/tests/scripts/models/config/l2_pool_2d/config.sh
+++ b/tests/scripts/models/tflite/l2_pool_2d/config.sh
diff --git a/tests/scripts/models/config/less/config.sh b/tests/scripts/models/tflite/less/config.sh
index 7488dde0f..7488dde0f 100755
--- a/tests/scripts/models/config/less/config.sh
+++ b/tests/scripts/models/tflite/less/config.sh
diff --git a/tests/scripts/models/config/less_equal/config.sh b/tests/scripts/models/tflite/less_equal/config.sh
index 2b53700f6..2b53700f6 100755
--- a/tests/scripts/models/config/less_equal/config.sh
+++ b/tests/scripts/models/tflite/less_equal/config.sh
diff --git a/tests/scripts/models/config/logistic/config.sh b/tests/scripts/models/tflite/logistic/config.sh
index 456773aa9..456773aa9 100755
--- a/tests/scripts/models/config/logistic/config.sh
+++ b/tests/scripts/models/tflite/logistic/config.sh
diff --git a/tests/scripts/models/config/max/config.sh b/tests/scripts/models/tflite/max/config.sh
index 479ca7fd0..479ca7fd0 100755
--- a/tests/scripts/models/config/max/config.sh
+++ b/tests/scripts/models/tflite/max/config.sh
diff --git a/tests/scripts/models/config/max_pool_2d/maxpool1/config.sh b/tests/scripts/models/tflite/max_pool_2d/maxpool1/config.sh
index 19a602eb5..19a602eb5 100755
--- a/tests/scripts/models/config/max_pool_2d/maxpool1/config.sh
+++ b/tests/scripts/models/tflite/max_pool_2d/maxpool1/config.sh
diff --git a/tests/scripts/models/config/max_pool_2d/maxpool2/config.sh b/tests/scripts/models/tflite/max_pool_2d/maxpool2/config.sh
index dc71599da..dc71599da 100755
--- a/tests/scripts/models/config/max_pool_2d/maxpool2/config.sh
+++ b/tests/scripts/models/tflite/max_pool_2d/maxpool2/config.sh
diff --git a/tests/scripts/models/config/mean/config.sh b/tests/scripts/models/tflite/mean/config.sh
index 0853a87fc..0853a87fc 100755
--- a/tests/scripts/models/config/mean/config.sh
+++ b/tests/scripts/models/tflite/mean/config.sh
diff --git a/tests/scripts/models/config/min/config.sh b/tests/scripts/models/tflite/min/config.sh
index 8148471a5..8148471a5 100755
--- a/tests/scripts/models/config/min/config.sh
+++ b/tests/scripts/models/tflite/min/config.sh
diff --git a/tests/scripts/models/config/mul/broadcast/config.sh b/tests/scripts/models/tflite/mul/broadcast/config.sh
index 5522ac877..5522ac877 100755
--- a/tests/scripts/models/config/mul/broadcast/config.sh
+++ b/tests/scripts/models/tflite/mul/broadcast/config.sh
diff --git a/tests/scripts/models/config/neg/config.sh b/tests/scripts/models/tflite/neg/config.sh
index 000f7c811..000f7c811 100755
--- a/tests/scripts/models/config/neg/config.sh
+++ b/tests/scripts/models/tflite/neg/config.sh
diff --git a/tests/scripts/models/config/not_equal/config.sh b/tests/scripts/models/tflite/not_equal/config.sh
index e2234197e..e2234197e 100755
--- a/tests/scripts/models/config/not_equal/config.sh
+++ b/tests/scripts/models/tflite/not_equal/config.sh
diff --git a/tests/scripts/models/config/one_hot/config.sh b/tests/scripts/models/tflite/one_hot/config.sh
index 7e3823486..7e3823486 100755
--- a/tests/scripts/models/config/one_hot/config.sh
+++ b/tests/scripts/models/tflite/one_hot/config.sh
diff --git a/tests/scripts/models/config/pack/config.sh b/tests/scripts/models/tflite/pack/config.sh
index 162ec9d9d..162ec9d9d 100755
--- a/tests/scripts/models/config/pack/config.sh
+++ b/tests/scripts/models/tflite/pack/config.sh
diff --git a/tests/scripts/models/config/pad/4D_2D/config.sh b/tests/scripts/models/tflite/pad/4D_2D/config.sh
index 9e0de2244..9e0de2244 100755
--- a/tests/scripts/models/config/pad/4D_2D/config.sh
+++ b/tests/scripts/models/tflite/pad/4D_2D/config.sh
diff --git a/tests/scripts/models/config/pad/pad1/config.sh b/tests/scripts/models/tflite/pad/pad1/config.sh
index 088cd8962..088cd8962 100755
--- a/tests/scripts/models/config/pad/pad1/config.sh
+++ b/tests/scripts/models/tflite/pad/pad1/config.sh
diff --git a/tests/scripts/models/config/pad/pad2/config.sh b/tests/scripts/models/tflite/pad/pad2/config.sh
index 1683f5350..1683f5350 100755
--- a/tests/scripts/models/config/pad/pad2/config.sh
+++ b/tests/scripts/models/tflite/pad/pad2/config.sh
diff --git a/tests/scripts/models/config/reduce_max/config.sh b/tests/scripts/models/tflite/reduce_max/config.sh
index d636b8bd3..d636b8bd3 100755
--- a/tests/scripts/models/config/reduce_max/config.sh
+++ b/tests/scripts/models/tflite/reduce_max/config.sh
diff --git a/tests/scripts/models/config/reduce_mean/test1/config.sh b/tests/scripts/models/tflite/reduce_mean/test1/config.sh
index 2f370ea4e..2f370ea4e 100755
--- a/tests/scripts/models/config/reduce_mean/test1/config.sh
+++ b/tests/scripts/models/tflite/reduce_mean/test1/config.sh
diff --git a/tests/scripts/models/config/reduce_mean/test2/config.sh b/tests/scripts/models/tflite/reduce_mean/test2/config.sh
index 6c54779a9..6c54779a9 100755
--- a/tests/scripts/models/config/reduce_mean/test2/config.sh
+++ b/tests/scripts/models/tflite/reduce_mean/test2/config.sh
diff --git a/tests/scripts/models/config/reduce_sum/float/config.sh b/tests/scripts/models/tflite/reduce_sum/float/config.sh
index 31b185397..31b185397 100755
--- a/tests/scripts/models/config/reduce_sum/float/config.sh
+++ b/tests/scripts/models/tflite/reduce_sum/float/config.sh
diff --git a/tests/scripts/models/config/reduce_sum/uint8/config.sh b/tests/scripts/models/tflite/reduce_sum/uint8/config.sh
index d7d9f73f6..d7d9f73f6 100755
--- a/tests/scripts/models/config/reduce_sum/uint8/config.sh
+++ b/tests/scripts/models/tflite/reduce_sum/uint8/config.sh
diff --git a/tests/scripts/models/config/relu/config.sh b/tests/scripts/models/tflite/relu/config.sh
index bca59ef04..bca59ef04 100755
--- a/tests/scripts/models/config/relu/config.sh
+++ b/tests/scripts/models/tflite/relu/config.sh
diff --git a/tests/scripts/models/config/relu6/config.sh b/tests/scripts/models/tflite/relu6/config.sh
index 662cc4f33..662cc4f33 100755
--- a/tests/scripts/models/config/relu6/config.sh
+++ b/tests/scripts/models/tflite/relu6/config.sh
diff --git a/tests/scripts/models/config/reshape/3D/config.sh b/tests/scripts/models/tflite/reshape/3D/config.sh
index 3f7ec31ea..3f7ec31ea 100755
--- a/tests/scripts/models/config/reshape/3D/config.sh
+++ b/tests/scripts/models/tflite/reshape/3D/config.sh
diff --git a/tests/scripts/models/config/reshape/reshape1/config.sh b/tests/scripts/models/tflite/reshape/reshape1/config.sh
index 7bdef06ba..7bdef06ba 100755
--- a/tests/scripts/models/config/reshape/reshape1/config.sh
+++ b/tests/scripts/models/tflite/reshape/reshape1/config.sh
diff --git a/tests/scripts/models/config/reshape/reshape2/config.sh b/tests/scripts/models/tflite/reshape/reshape2/config.sh
index b040f0081..b040f0081 100755
--- a/tests/scripts/models/config/reshape/reshape2/config.sh
+++ b/tests/scripts/models/tflite/reshape/reshape2/config.sh
diff --git a/tests/scripts/models/config/resize_bilinear/config.sh b/tests/scripts/models/tflite/resize_bilinear/config.sh
index 8f612cf6d..8f612cf6d 100755
--- a/tests/scripts/models/config/resize_bilinear/config.sh
+++ b/tests/scripts/models/tflite/resize_bilinear/config.sh
diff --git a/tests/scripts/models/config/rnn/config.sh b/tests/scripts/models/tflite/rnn/config.sh
index 997d6c138..997d6c138 100755
--- a/tests/scripts/models/config/rnn/config.sh
+++ b/tests/scripts/models/tflite/rnn/config.sh
diff --git a/tests/scripts/models/config/rsqrt/config.sh b/tests/scripts/models/tflite/rsqrt/config.sh
index 87aa85277..87aa85277 100755
--- a/tests/scripts/models/config/rsqrt/config.sh
+++ b/tests/scripts/models/tflite/rsqrt/config.sh
diff --git a/tests/scripts/models/config/select/config.sh b/tests/scripts/models/tflite/select/config.sh
index 95e49e0dc..95e49e0dc 100755
--- a/tests/scripts/models/config/select/config.sh
+++ b/tests/scripts/models/tflite/select/config.sh
diff --git a/tests/scripts/models/config/shape/config.sh b/tests/scripts/models/tflite/shape/config.sh
index 468f38687..468f38687 100644
--- a/tests/scripts/models/config/shape/config.sh
+++ b/tests/scripts/models/tflite/shape/config.sh
diff --git a/tests/scripts/models/config/sin/config.sh b/tests/scripts/models/tflite/sin/config.sh
index dcf1959d8..dcf1959d8 100755
--- a/tests/scripts/models/config/sin/config.sh
+++ b/tests/scripts/models/tflite/sin/config.sh
diff --git a/tests/scripts/models/config/slice/config.sh b/tests/scripts/models/tflite/slice/config.sh
index 12d06e977..12d06e977 100755
--- a/tests/scripts/models/config/slice/config.sh
+++ b/tests/scripts/models/tflite/slice/config.sh
diff --git a/tests/scripts/models/config/softmax/config.sh b/tests/scripts/models/tflite/softmax/config.sh
index fa6300d7e..fa6300d7e 100755
--- a/tests/scripts/models/config/softmax/config.sh
+++ b/tests/scripts/models/tflite/softmax/config.sh
diff --git a/tests/scripts/models/config/space_to_batch_nd2/config.sh b/tests/scripts/models/tflite/space_to_batch_nd2/config.sh
index 81933709e..81933709e 100755
--- a/tests/scripts/models/config/space_to_batch_nd2/config.sh
+++ b/tests/scripts/models/tflite/space_to_batch_nd2/config.sh
diff --git a/tests/scripts/models/config/space_to_depth/config.sh b/tests/scripts/models/tflite/space_to_depth/config.sh
index ed103b826..ed103b826 100755
--- a/tests/scripts/models/config/space_to_depth/config.sh
+++ b/tests/scripts/models/tflite/space_to_depth/config.sh
diff --git a/tests/scripts/models/config/sqrt/config.sh b/tests/scripts/models/tflite/sqrt/config.sh
index 220147238..220147238 100755
--- a/tests/scripts/models/config/sqrt/config.sh
+++ b/tests/scripts/models/tflite/sqrt/config.sh
diff --git a/tests/scripts/models/config/squeeze/config.sh b/tests/scripts/models/tflite/squeeze/config.sh
index 5bcc67716..5bcc67716 100755
--- a/tests/scripts/models/config/squeeze/config.sh
+++ b/tests/scripts/models/tflite/squeeze/config.sh
diff --git a/tests/scripts/models/config/strided_slice/config.sh b/tests/scripts/models/tflite/strided_slice/config.sh
index 4c41a1a39..4c41a1a39 100755
--- a/tests/scripts/models/config/strided_slice/config.sh
+++ b/tests/scripts/models/tflite/strided_slice/config.sh
diff --git a/tests/scripts/models/config/sub/broadcast/config.sh b/tests/scripts/models/tflite/sub/broadcast/config.sh
index 2b1add0e5..2b1add0e5 100755
--- a/tests/scripts/models/config/sub/broadcast/config.sh
+++ b/tests/scripts/models/tflite/sub/broadcast/config.sh
diff --git a/tests/scripts/models/config/tanh/config.sh b/tests/scripts/models/tflite/tanh/config.sh
index a9dde4923..a9dde4923 100755
--- a/tests/scripts/models/config/tanh/config.sh
+++ b/tests/scripts/models/tflite/tanh/config.sh
diff --git a/tests/scripts/models/config/tile/config.sh b/tests/scripts/models/tflite/tile/config.sh
index 33fda3e1a..33fda3e1a 100644
--- a/tests/scripts/models/config/tile/config.sh
+++ b/tests/scripts/models/tflite/tile/config.sh
diff --git a/tests/scripts/models/config/topk_v2/config.sh b/tests/scripts/models/tflite/topk_v2/config.sh
index 1a460266f..1a460266f 100755
--- a/tests/scripts/models/config/topk_v2/config.sh
+++ b/tests/scripts/models/tflite/topk_v2/config.sh
diff --git a/tests/scripts/models/config/transpose/config.sh b/tests/scripts/models/tflite/transpose/config.sh
index 9adb85e70..9adb85e70 100755
--- a/tests/scripts/models/config/transpose/config.sh
+++ b/tests/scripts/models/tflite/transpose/config.sh
diff --git a/tests/scripts/models/config/transpose_conv/same/config.sh b/tests/scripts/models/tflite/transpose_conv/same/config.sh
index 2cca86e03..2cca86e03 100755
--- a/tests/scripts/models/config/transpose_conv/same/config.sh
+++ b/tests/scripts/models/tflite/transpose_conv/same/config.sh
diff --git a/tests/scripts/models/config/transpose_conv/valid/config.sh b/tests/scripts/models/tflite/transpose_conv/valid/config.sh
index d162331a3..d162331a3 100755
--- a/tests/scripts/models/config/transpose_conv/valid/config.sh
+++ b/tests/scripts/models/tflite/transpose_conv/valid/config.sh
diff --git a/tests/scripts/models/config/zeros_like/config.sh b/tests/scripts/models/tflite/zeros_like/config.sh
index cadeeb961..cadeeb961 100755
--- a/tests/scripts/models/config/zeros_like/config.sh
+++ b/tests/scripts/models/tflite/zeros_like/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh b/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh
deleted file mode 100755
index ca282c84a..000000000
--- a/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env bash
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-function need_download()
-{
- LOCAL_PATH=$1
- REMOTE_URL=$2
- if [ ! -e $LOCAL_PATH ]; then
- return 0;
- fi
- # Ignore checking md5 in cache
- if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
- return 1
- fi
-
- LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
- REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
- # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
- if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
- echo "Downloaded file is outdated or incomplete."
- return 0
- fi
- return 1
-}
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-download_tests()
-{
- SELECTED_TESTS=$@
-
- echo ""
- echo "Downloading tests:"
- echo "======================"
- for TEST_NAME in $SELECTED_TESTS; do
- echo $TEST_NAME
- done
- echo "======================"
-
- for TEST_NAME in $SELECTED_TESTS; do
- # Test configure initialization
- MODELFILE_SERVER_PATH=""
- MODELFILE_NAME=""
- source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
- TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
- MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $TEST_CACHE_PATH ]; then
- mkdir -p $TEST_CACHE_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $TEST_CACHE_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME
- rm *.zip
- fi
- popd
- fi
-
- done
-}
-
-realpath()
-{
- readlink -e -- "$@"
-}
-
-usage()
-{
- echo "Usage: $0 --modelfile-server=MODELFILE_SERVER --install-path=INSTALL_DIR"
- echo " MODELFILE_SERVER : Base URL of the model file server"
- echo " INSTALL_DIR : Path to be installed"
- exit 1
-}
-
-while [[ $# -gt 0 ]]
-do
- key="$(echo $1 | awk '{print tolower($0)}')"
- case "$key" in
- -?|-h|--help)
- usage
- exit 1
- ;;
- --modelfile-server)
- MODELFILE_SERVER="$2"
- shift
- ;;
- --modelfile-server=*)
- MODELFILE_SERVER="${1#*=}"
- ;;
- --install-dir)
- INSTALL_DIR="$2"
- shift
- ;;
- --install-dir=*)
- INSTALL_DIR="${1#*=}"
- ;;
- *)
- echo "Invalid option '$1'"
- usage
- exit 1
- ;;
- esac
- shift
-done
-
-if [ -z "$MODELFILE_SERVER" ]; then
- echo "Please specify a value for --modelfile-server or MODELFILE_SERVER(env)."
- usage
- exit 1
-fi
-
-if [ -z "$INSTALL_DIR" ]; then
- echo "Please specify a value for --install-dir or INSTALL_DIR(env)."
- usage
- exit 1
-fi
-
-set -e
-
-THIS_SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE}))
-source ${THIS_SCRIPT_DIR}/../common.sh
-
-CACHE_ROOT_PATH=$INSTALL_DIR
-FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
-TEST_ROOT_PATH=${THIS_SCRIPT_DIR}/models
-
-# All models in the directory are the target models
-pushd ${TEST_ROOT_PATH}
-MODELS=$(ls -d */)
-popd
-
-download_tests $MODELS
-
-set +e
diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh
index 5c24572d8..b88cae838 100755
--- a/tests/scripts/test_scheduler_with_profiling.sh
+++ b/tests/scripts/test_scheduler_with_profiling.sh
@@ -37,7 +37,7 @@ function run_without_sched()
print_with_dots "$EXECUTOR $BACKEND without scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
printf -v RESULT_INT '%d' $RESULT 2>/dev/null
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
@@ -105,12 +105,12 @@ function run_benchmark_test()
export GRAPH_DOT_DUMP=1
print_with_dots "Parallel with scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
echo "$RESULT ms"
printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
- mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_parallel.dot"
+ mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
##################################################################################
# Run Linear executor with scheduler
@@ -120,7 +120,7 @@ function run_benchmark_test()
export GRAPH_DOT_DUMP=1
print_with_dots "Linear with scheduler"
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
printf -v RESULT_INT '%d' $RESULT 2>/dev/null
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
@@ -130,7 +130,7 @@ function run_benchmark_test()
# for operations with input&output sizes the same as the model
mv "exec_time.json" $REPORT_MODEL_DIR
# Save the dot graph
- mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_linear.dot"
+ mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot"
unset GRAPH_DOT_DUMP
##################################################################################
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc
index cb4a7dbaa..90021bff3 100644
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -55,8 +55,8 @@ std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonv
return ret;
}
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]"
-void handleShapeParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
{
Json::Value root;
Json::Reader reader;
@@ -152,9 +152,16 @@ void Args::Initialize(void)
};
auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+ return;
+ }
+#endif
try
{
- handleShapeParam(_shape_prepare, shape_str);
+ handleShapeJsonParam(_shape_prepare, shape_str);
}
catch (const std::exception &e)
{
@@ -164,9 +171,16 @@ void Args::Initialize(void)
};
auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+ return;
+ }
+#endif
try
{
- handleShapeParam(_shape_run, shape_str);
+ handleShapeJsonParam(_shape_run, shape_str);
}
catch (const std::exception &e)
{
@@ -202,11 +216,13 @@ void Args::Initialize(void)
"e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
"{nnpkg} name may be changed to realpath if you use symbolic-link.")
("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
- "set shape of specified tensor before compilation\n"
- "e.g. '[0, [1, 2], 2, []]' to set 0th tensor to [1, 2] and 2nd tensor to [].\n")
+ "set shape of specified tensor before compilation (before calling nnfw_prepare()).\n"
+ "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+ "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
- "set shape of specified tensor right before running\n"
- "e.g. '[1, [1, 2]]` to set 1st tensor to [1, 2].\n")
+ "set shape of specified tensor before running (before calling nnfw_run()).\n"
+ "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+ "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
"Verbose level\n"
"0: prints the only result. Messages btw run don't print\n"
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h
index 4bc3e6c62..d2b33fc82 100644
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/nnpackage_run/src/args.h
@@ -22,12 +22,23 @@
#include <vector>
#include <boost/program_options.hpp>
+#include "types.h"
+
namespace po = boost::program_options;
namespace nnpkg_run
{
-using TensorShapeMap = std::unordered_map<uint32_t, std::vector<int>>;
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+ DO_NOT_USE, // don't use shapes in h5 file
+ PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+ RUN, // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
class Args
{
@@ -39,6 +50,7 @@ public:
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
const std::string &getDumpFilename(void) const { return _dump_filename; }
const std::string &getLoadFilename(void) const { return _load_filename; }
+ WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
#endif
const int getNumRuns(void) const { return _num_runs; }
const int getWarmupRuns(void) const { return _warmup_runs; }
@@ -48,8 +60,8 @@ public:
const bool getMemoryPoll(void) const { return _mem_poll; }
const bool getWriteReport(void) const { return _write_report; }
const bool printVersion(void) const { return _print_version; }
- const TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
- const TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+ TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+ TensorShapeMap &getShapeMapForRun() { return _shape_run; }
const int getVerboseLevel(void) const { return _verbose_level; }
private:
@@ -64,6 +76,7 @@ private:
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
std::string _dump_filename;
std::string _load_filename;
+ WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::DO_NOT_USE;
#endif
TensorShapeMap _shape_prepare;
TensorShapeMap _shape_run;
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc
index 09ace4798..3929c8d90 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -22,10 +22,68 @@
#include <stdexcept>
#include <H5Cpp.h>
+namespace
+{
+nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
+{
+ std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+ H5::DataSpace data_space = data_set.getSpace();
+ int rank = data_space.getSimpleExtentNdims();
+ h5_shape.resize(rank);
+
+ // read shape info from H5 file
+ data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+ nnpkg_run::TensorShape shape;
+ for (auto dim : h5_shape)
+ shape.emplace_back(static_cast<int>(dim));
+
+ return shape;
+}
+} // namespace
+
namespace nnpkg_run
{
static const char *h5_value_grpname = "value";
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+ std::vector<TensorShape> tensor_shapes;
+
+ try
+ {
+ H5::Exception::dontPrint();
+
+ H5::H5File file(filename, H5F_ACC_RDONLY);
+ H5::Group value_group = file.openGroup(h5_value_grpname);
+
+ // Constraints: if there are n data set names, they should be unique and
+ // one of [ "0", "1", .. , "n-1" ]
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+ H5::DataType type = data_set.getDataType();
+ auto shape = getShape(data_set);
+
+ tensor_shapes.emplace_back(shape);
+ }
+
+ return tensor_shapes;
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ std::exit(-1);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+}
+
void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
{
uint32_t num_inputs;
@@ -41,6 +99,9 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
{
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+ // TODO Add Assert(nnfw shape, h5 file shape size)
+
// allocate memory for data
auto bufsz = bufsize_for(&ti);
inputs[i].alloc(bufsz);
@@ -156,7 +217,7 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
case NNFW_TYPE_TENSOR_BOOL:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h
index c8b64bfcd..203ba0e72 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/nnpackage_run/src/h5formatter.h
@@ -20,6 +20,7 @@
#include <string>
#include <vector>
+#include "types.h"
#include "allocation.h"
struct nnfw_session;
@@ -30,6 +31,7 @@ class H5Formatter
{
public:
H5Formatter(nnfw_session *sess) : session_(sess) {}
+ std::vector<TensorShape> readTensorShapes(const std::string &filename);
void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
index 88d3307af..a78e144d8 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -37,28 +37,13 @@
#include <unordered_map>
#include <vector>
-static const char *default_backend_cand = "acl_cl";
+static const char *default_backend_cand = "cpu";
-NNFW_STATUS resolve_op_backend(nnfw_session *session)
+void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
+ std::vector<nnpkg_run::TensorShape> shapes)
{
- static std::unordered_map<std::string, std::string> operation_map = {
- {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
- {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
- {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
- {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
-
- for (auto i : operation_map)
- {
- char *default_backend = std::getenv(i.second.c_str());
- if (default_backend)
- {
- NNFW_STATUS return_result = nnfw_set_op_backend(session, i.first.c_str(), default_backend);
- if (return_result == NNFW_STATUS_ERROR)
- return return_result;
- }
- }
-
- return NNFW_STATUS_NO_ERROR;
+ for (uint32_t i = 0; i < shapes.size(); i++)
+ shape_map[i] = shapes[i];
}
int main(const int argc, char **argv)
@@ -98,7 +83,6 @@ int main(const int argc, char **argv)
char *available_backends = std::getenv("BACKENDS");
if (available_backends)
NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
- NNPR_ENSURE_STATUS(resolve_op_backend(session));
uint32_t num_inputs;
NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
@@ -157,7 +141,14 @@ int main(const int argc, char **argv)
verifyInputTypes();
verifyOutputTypes();
- // set input shape before compilation
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+ {
+ auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+ overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
+ }
+#endif
setTensorInfo(args.getShapeMapForPrepare());
// prepare execution
@@ -167,7 +158,14 @@ int main(const int argc, char **argv)
NNPR_ENSURE_STATUS(nnfw_prepare(session));
});
- // set input shape after compilation and before execution
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
+ {
+ auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+ overwriteShapeMap(args.getShapeMapForRun(), shapes);
+ }
+#endif
setTensorInfo(args.getShapeMapForRun());
// prepare input
diff --git a/compiler/circle-quantizer/src/CircleExpContract.cpp b/tests/tools/nnpackage_run/src/types.h
index b56b7eedc..93a7ab230 100644
--- a/compiler/circle-quantizer/src/CircleExpContract.cpp
+++ b/tests/tools/nnpackage_run/src/types.h
@@ -14,20 +14,14 @@
* limitations under the License.
*/
-#include "CircleExpContract.h"
+#ifndef __NNPACKAGE_RUN_TYPES_H__
+#define __NNPACKAGE_RUN_TYPES_H__
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace nnpkg_run
{
- if (!ptr)
- INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
- std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
- fs.write(ptr, size);
+using TensorShape = std::vector<int>;
+
+} // end of namespace nnpkg_run
- return fs.good();
-}
+#endif // __NNPACKAGE_RUN_TYPES_H__
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
index 00b8b0ed4..e72966db5 100644
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -220,13 +220,16 @@ int main(const int argc, char **argv)
// Generate unsigned 8-bit integer input
auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
- uint8_t value = 0;
+ auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
+ std::bind(fp, randgen, _1, _2));
nnfw::misc::tensor::iterate(tensor_view.shape())
<< [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
+ const auto value = data.at(ind);
tensor_view.at(ind) = value;
- value = (value + 1) & 0xFF;
};
}
else if (tensor->type == kTfLiteBool)
diff --git a/tools/nnpackage_tool/model2nnpkg/README.md b/tools/nnpackage_tool/model2nnpkg/README.md
index 716f4f8d5..9d4676e23 100644
--- a/tools/nnpackage_tool/model2nnpkg/README.md
+++ b/tools/nnpackage_tool/model2nnpkg/README.md
@@ -13,9 +13,11 @@ Convert modelfile (either tflite or circle) to nnpackage.
Options:
-h show this help
-o set nnpackage output directory (default=.)
+ -p set nnpackage output name (default=[modelfile name])
Examples:
- model2nnpkg.sh add.tflite => create nnpackage in ./
- model2nnpkg.sh -o out add.tflite => create nnpackage in out/
+ model2nnpkg.sh add.tflite => create nnpackage 'add' in ./
+ model2nnpkg.sh -o out add.tflite => create nnpackage 'add' in out/
+ model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
```
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
index 87cd7878b..26f6c70e8 100755
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -4,6 +4,7 @@ set -eu
progname=$(basename "${BASH_SOURCE[0]}")
outdir="."
+name=""
usage() {
echo "Usage: $progname [options] modelfile"
@@ -12,10 +13,12 @@ usage() {
echo "Options:"
echo " -h show this help"
echo " -o set nnpackage output directory (default=$outdir)"
+ echo " -p set nnpackage output name (default=[modelfile name])"
echo ""
echo "Examples:"
- echo " $progname add.tflite => create nnpackage in $outdir/"
- echo " $progname -o out add.tflite => create nnpackage in out/"
+ echo " $progname add.tflite => create nnpackage 'add' in $outdir/"
+ echo " $progname -o out add.tflite => create nnpackage 'add' in out/"
+ echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
exit 1
}
@@ -24,10 +27,11 @@ if [ $# -eq 0 ]; then
exit 1
fi
-while getopts "ho:" OPTION; do
+while getopts "ho:p:" OPTION; do
case "${OPTION}" in
h) usage;;
o) outdir=$OPTARG;;
+ p) name=$OPTARG;;
?) exit 1;;
esac
done
@@ -53,7 +57,9 @@ if [ ! -e $1 ]; then
exit 1
fi
-name=${modelfile%.*}
+if [ -z "$name" ]; then
+ name=${modelfile%.*}
+fi
extension=${modelfile##*.}
echo "Generating nnpackage "$name" in "$outdir""
@@ -63,7 +69,7 @@ cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
"major-version" : "1",
"minor-version" : "0",
"patch-version" : "0",
- "models" : [ "$name.$extension" ],
+ "models" : [ "$modelfile" ],
"model-types" : [ "$extension" ]
}
EOF
diff --git a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh b/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
index 6ad2ef9e2..409c05832 100755
--- a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
+++ b/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
@@ -7,7 +7,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
nnfw_root="$( cd "${script_dir%*/*/*/*}" && pwd )"
outdir="."
flatc=${flatc:-"$nnfw_root/build/externals/FLATBUFFERS/build/flatc"}
-tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.12/tensorflow/contrib/lite/schema/schema.fbs"}
+tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.13.1/tensorflow/lite/schema/schema.fbs"}
circle_schema=${circle_schema:-"$nnfw_root/nnpackage/schema/circle_schema.fbs"}
if ! [ -x "$flatc" ]; then
@@ -73,7 +73,7 @@ name=${tflite_base%.*}
# convert
mkdir -p "${outdir}"
-${flatc} -o ${outdir} --defaults-json --strict-json -t ${tflite_schema} -- $1
+${flatc} -o ${outdir} --strict-json -t ${tflite_schema} -- $1
${script_dir}/tflitejson2circlejson.py "${outdir}/${name}.json" > "${outdir}/${name}.circle"
${flatc} -o ${outdir} -b ${circle_schema} "${outdir}/${name}.circle"
rm -f ${outdir}/${name}.json
diff --git a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
index a6955d8c1..272463fb0 100755
--- a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
+++ b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
@@ -34,8 +34,6 @@ if __name__ == '__main__':
with open(json_path, "r") as f:
try:
json_dict = json.load(f, object_pairs_hook=OrderedDict)
- for subgraph in json_dict["subgraphs"]:
- subgraph["data_format"] = "CHANNELS_LAST"
json_dict["version"] = 0
print(json.dumps(json_dict, indent=2))
except KeyError:
diff --git a/tools/release_tool/README.md b/tools/release_tool/README.md
new file mode 100644
index 000000000..8918604fd
--- /dev/null
+++ b/tools/release_tool/README.md
@@ -0,0 +1,68 @@
+# Content
+
+- git_release.sh
+- onert_version.sh
+
+# git_release.sh
+
+This tool helps you to automate GitHub releases.
+
+## Usage
+```
+$ ./git_release.sh --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ...
+```
+
+## Options
+```
+--tag The name of the tag
+--release_name The name of the release
+--release_note Path of text file describing the contents of the release
+--commitish The commitish value that determines where the Git tag is created from
+--draft Create a draft release
+--token User token for authentication
+--host_name Host name for endpoint URL [Enterprise-specific endpoint only]
+--repo_owner Owner of the repository
+--repo_name The name of the repository
+--asset Path of release asset
+```
+
+## Examples
+```
+$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \
+ --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \
+ --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+
+$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \
+ --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \
+ --release_name "Release Automation" --release_note /home/mhs4670go/ONE/release_doc \
+ --host_name github.sec.company.net --draft
+```
+
+## Reference
+https://developer.github.com/v3/repos/releases/#create-a-release
+
+
+# onert_version.sh
+
+onert_version.sh updates version information.
+
+## Usage
+```
+$ ./onert_version.sh -h
+Usage: onert_version.sh version
+Update or show onert version information
+```
+
+## Options
+```
+-h show this help
+-s set onert version
+```
+
+## Examples
+```
+$ ./onert_version.sh => show current onert version
+$ ./onert_version.sh -s 1.6.0 => set onert version info in all sources
+```
diff --git a/tools/release_tool/git_release.sh b/tools/release_tool/git_release.sh
new file mode 100755
index 000000000..adba7df2f
--- /dev/null
+++ b/tools/release_tool/git_release.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# This script is to automate the process of monthly release with github API
+
+# Test if getopt is enhanced version
+getopt --test > /dev/null
+if [ $? -ne 4 ]; then
+ echo "[ERROR] Your system doesn't have enhanced getopt"
+ echo 2
+fi
+
+function Usage()
+{
+ echo "Usage: ./$(basename ${BASH_SOURCE[0]}) --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ..."
+ echo ""
+ echo "[OPTIONS]"
+ echo "--tag The name of the tag"
+ echo "--release_name The name of the release"
+ echo "--release_note Path of text file describing the contents of the release"
+ echo "--commitish The commitish value that determines where the Git tag is created from"
+ echo "--draft Create a draft release"
+ echo "--token User token for authentication"
+ echo "--host_name Host name for endpoint URL [Enterprise-specific endpoint only]"
+ echo "--repo_owner Owner of the repository"
+ echo "--repo_name The name of the repository"
+ echo "--asset Path of release asset"
+ echo ""
+ echo "[EXAMPLE]"
+ echo "$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \\"
+ echo " --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \\"
+ echo " --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+ echo ""
+ echo "$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \\"
+ echo " --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \\"
+ echo " --release_name \"Release Automation\" --release_note /home/mhs4670go/ONE/release_doc \\"
+ echo " --host_name github.sec.company.net --draft"
+ echo ""
+ echo "[REFERENCE]"
+ echo "https://developer.github.com/v3/repos/releases/#create-a-release"
+
+}
+
+SHORT_OPTS=h
+LONG_OPTS="\
+help,\
+tag:,\
+release_name:,\
+release_note:,\
+commitish:,\
+draft,\
+token:,\
+host_name:,\
+repo_owner:,\
+repo_name:,\
+asset:"
+
+OPTS=$(getopt --options "$SHORT_OPTS" --longoptions "$LONG_OPTS" --name "$0" -- "$@")
+
+if [ $? != 0 ] ; then echo "[ERROR] Failed to parse options" ; exit 2 ; fi
+
+eval set -- "$OPTS"
+
+unset TAG_NAME
+unset RELEASE_NAME
+unset RELEASE_NOTE
+unset TARGET_COMMITISH
+unset USER_TOKEN
+unset HOST_NAME
+unset REPO_OWNER
+unset REPO_NAME
+IS_DRAFT=false
+ASSET_PATHS=()
+
+while true ; do
+ case "$1" in
+ -h|--help )
+ Usage
+ exit 0
+ ;;
+ --tag ) # REQUIRED
+ TAG_NAME="$2"
+ shift 2
+ ;;
+ --release_name )
+ RELEASE_NAME="$2"
+ shift 2
+ ;;
+ --release_note ) # REQUIRED
+ RELEASE_NOTE="$2"
+ shift 2
+ ;;
+ --commitish )
+ TARGET_COMMITISH="$2"
+ shift 2
+ ;;
+ --draft )
+ IS_DRAFT=true
+ shift
+ ;;
+ --token ) # REQUIRED
+ USER_TOKEN="$2"
+ shift 2
+ ;;
+ --host_name )
+ HOST_NAME="$2/api/v3"
+ shift 2
+ ;;
+ --repo_owner )
+ REPO_OWNER="$2"
+ shift 2
+ ;;
+ --repo_name )
+ REPO_NAME="$2"
+ shift 2
+ ;;
+ --asset )
+ ASSET_PATHS+=("$2")
+ shift 2
+ ;;
+ -- )
+ shift
+ break
+ ;;
+ *)
+ echo "[ERROR] getopt internal error"
+ exit 2
+ ;;
+ esac
+done
+
+# Check if required options are specified
+if [ -z ${TAG_NAME} ]; then
+ echo "[ERROR] You must specify '--tag' option"
+ Usage
+ exit 0
+fi
+if [ -z ${RELEASE_NOTE} ]; then
+ echo "[ERROR] You must specify '--release_note' option"
+ Usage
+ exit 0
+fi
+if [ -z ${USER_TOKEN} ]; then
+ echo "[ERROR] You must specify '--token' option"
+ Usage
+ exit 0
+fi
+
+# Print variables and set default value
+DEFAULT_RELEASE_NAME="ONE Release ${TAG_NAME}"
+DEFAULT_HOST_NAME="api.github.com"
+DEFAULT_REPO_OWNER="Samsung"
+DEFAULT_REPO_NAME="ONE"
+echo "======================[RELEASE INFO]======================"
+echo "TAG_NAME : ${TAG_NAME}"
+echo "RELEASE_NAME : ${RELEASE_NAME:=${DEFAULT_RELEASE_NAME}}"
+echo "RELEASE_NOTE : ${RELEASE_NOTE}"
+echo "TARGET_COMMITISH : ${TARGET_COMMITISH:=${TAG_NAME}}"
+echo "IS_DRAFT : ${IS_DRAFT}"
+echo "USER_TOKEN : ${USER_TOKEN}"
+echo "HOST_NAME : ${HOST_NAME:=${DEFAULT_HOST_NAME}}"
+echo "REPO_OWNER : ${REPO_OWNER:=${DEFAULT_REPO_OWNER}}"
+echo "REPO_NAME : ${REPO_NAME:=${DEFAULT_REPO_NAME}}"
+echo "ASSETS : ${ASSET_PATHS[@]}"
+echo "==========================================================="
+
+function generate_release_data()
+{
+ cat <<EOF
+{
+ "tag_name": "${TAG_NAME}",
+ "target_commitish": "${TARGET_COMMITISH}",
+ "name": "${RELEASE_NAME}",
+ "body": "$(cat $1 | sed 's/$/\\n/' | tr -d '\n')",
+ "draft": ${IS_DRAFT},
+ "prerelease": false
+}
+EOF
+}
+
+# Check if the release already exists
+RELEASE_URL=$(curl -s --request GET --header "Authorization: token ${USER_TOKEN}" \
+https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases/tags/${TAG_NAME} | \
+jq -r '.url')
+
+if [ $RELEASE_URL != null ]; then
+ echo "[ERROR] The tag name you specified already exists."
+ exit 2
+fi
+
+# Create a release (with assinging upload_url using jq)
+UPLOAD_URL=$(curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+--header "Accept: application/json" \
+--data "$(eval generate_release_data '${RELEASE_NOTE}')" \
+"https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases" | \
+jq -r '.upload_url')
+
+UPLOAD_URL=$(echo ${UPLOAD_URL} | cut -d "{" -f 1)?name=
+
+# Upload the assets
+for ASSET_PATH in "${ASSET_PATHS[@]}"; do
+ curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+ --header "Content-Type: $(file -b --mime-type ${ASSET_PATH})" \
+ --data-binary @${ASSET_PATH} \
+ ${UPLOAD_URL}${ASSET_PATH} > /dev/null
+done
diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh
new file mode 100755
index 000000000..eafe96e3d
--- /dev/null
+++ b/tools/release_tool/onert_version.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+set -eu
+
+progname=$(basename "${BASH_SOURCE[0]}")
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
+
+usage() {
+ echo "Usage: $progname version"
+ echo "Update or show onert version information"
+ echo ""
+ echo "Options:"
+ echo " -h show this help"
+ echo " -s set onert version"
+ echo ""
+ echo "Examples:"
+ echo " $progname => show current onert version"
+ echo " $progname -s 1.6.0 => set onert version info in all sources"
+ exit 1
+}
+
+show_version() {
+ version_line=$(cat ${nnfw_root}/packaging/nnfw.spec | grep "Version:")
+ echo ${version_line#"Version:"}
+
+ exit 0
+}
+
+set_version() {
+ version=$1
+ perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
+ perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+
+ IFS=. read M m p <<< "$version"
+ hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
+ perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
+
+ perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
+}
+
+if [ $# -eq 0 ]; then
+ show_version
+fi
+
+while getopts "hs:" OPTION; do
+case "${OPTION}" in
+ h) usage;;
+ s) set_version "$OPTARG";;
+ ?) exit 1;;
+esac
+done
+
+shift $((OPTIND-1))
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py
index 4ef2374cf..cd66bf500 100755
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -17,12 +17,6 @@
import os
import sys
import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-flatbuffersPath = '../../externals/flatbuffers'
-sys.path.append(
- os.path.join(os.path.dirname(os.path.abspath(__file__)), flatbuffersPath + '/python'))
-
import flatbuffers
import tflite.Model
import tflite.SubGraph
diff --git a/tools/tflitefile_tool/requirements.txt b/tools/tflitefile_tool/requirements.txt
new file mode 100644
index 000000000..9b4366ae5
--- /dev/null
+++ b/tools/tflitefile_tool/requirements.txt
@@ -0,0 +1,2 @@
+flatbuffers>=1.12
+numpy
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py
index 333ca32f6..863edea57 100755
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -17,12 +17,6 @@
import os
import sys
import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-sys.path.append(
- os.path.join(
- os.path.dirname(os.path.abspath(__file__)), '../../externals/flatbuffers/python'))
-
import flatbuffers
import tflite.Model
import tflite.SubGraph
@@ -278,6 +272,10 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
conv2d_options.StrideW())
tflite.Conv2DOptions.Conv2DOptionsAddStrideH(new_builder,
conv2d_options.StrideH())
+ tflite.Conv2DOptions.Conv2DOptionsAddDilationWFactor(
+ new_builder, conv2d_options.DilationWFactor())
+ tflite.Conv2DOptions.Conv2DOptionsAddDilationHFactor(
+ new_builder, conv2d_options.DilationHFactor())
tflite.Conv2DOptions.Conv2DOptionsAddFusedActivationFunction(
new_builder, conv2d_options.FusedActivationFunction())
return tflite.Conv2DOptions.Conv2DOptionsEnd(new_builder)
@@ -725,7 +723,17 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
# GreaterOptions: not supported
# GreaterEqualOptions: not supported
# LessEqualOptions: not supported
- # SelectOptions: not supported
+
+ # SelectOptions
+ import tflite.SelectOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().SelectOptions:
+
+ select_option = tflite.SelectOptions.SelectOptions()
+ select_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.SelectOptions.SelectOptionsStart(new_builder)
+ return tflite.SelectOptions.SelectOptionsEnd(new_builder)
+
# SliceOptions: not supported
# TransposeConvOptions
@@ -867,7 +875,18 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
# FloorModOptions: not supported
# RangeOptions: not supported
# ResizeNearestNeighborOptions: not supported
- # LeakyReluOptions: not supported
+
+ # LeakyReluOptions
+ import tflite.LeakyReluOptions
+ if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().LeakyReluOptions:
+
+ leaky_relu_option = tflite.LeakyReluOptions.LeakyReluOptions()
+ leaky_relu_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+ tflite.LeakyReluOptions.LeakyReluOptionsStart(new_builder)
+ tflite.LeakyReluOptions.LeakyReluOptionsAddAlpha(new_builder,
+ leaky_relu_option.Alpha())
+ return tflite.LeakyReluOptions.LeakyReluOptionsEnd(new_builder)
# SquaredDifferenceOptions
import tflite.SquaredDifferenceOptions
@@ -915,7 +934,8 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
return tflite.WhileOptions.WhileOptionsEnd(new_builder)
# Cannot handle builtin option type yet
- print("Cannot handle this option yet")
+ print("Cannot handle BuiltinOptions {} yet. See BuiltinOptions.py for op name".format(
+ builtin_option_type))
exit(1)
diff --git a/tools/update_version/update-version b/tools/update_version/update-version
deleted file mode 100644
index 1b77c10cd..000000000
--- a/tools/update_version/update-version
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-progname=$(basename "${BASH_SOURCE[0]}")
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
-
-usage() {
- echo "Usage: $progname version"
- echo "Update all version information"
- echo ""
- echo "Options:"
- echo " -h show this help"
- echo ""
- echo "Examples:"
- echo " $progname 1.6.0"
- exit 1
-}
-
-if [ $# -eq 0 ]; then
- echo "For help, type $progname -h"
- exit 1
-fi
-
-while getopts "ho:" OPTION; do
-case "${OPTION}" in
- h) usage;;
- ?) exit 1;;
-esac
-done
-
-shift $((OPTIND-1))
-
-if [ $# -ne 1 ]; then
- echo "error: wrong argument (no argument or too many arguments)."
- echo "For help, type $progname -h"
- exit 1
-fi
-
-version=$1
-
-perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-
-perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
-
-IFS=. read M m p <<< "$version"
-hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
-perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
-
-perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle