summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
commit62529acabbafce7730601ed01d5709d7bc0d378a (patch)
treebf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime
parent6ea13af5257155ff993c205cf997b870cc627f73 (diff)
downloadnnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'runtime')
l---------runtime/contrib/.clang-format1
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h11
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h3
-rw-r--r--runtime/contrib/android/api/Prebuilt.mk8
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/contrib/android/api/src/main/native/onert-native-api.cpp16
-rw-r--r--runtime/contrib/android/api/src/main/native/onert-native-api.h16
-rw-r--r--runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp2
-rw-r--r--runtime/contrib/android_tflite/builtin_ops_jni.cc2
-rw-r--r--runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc4
-rw-r--r--runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h2
-rw-r--r--runtime/contrib/heap_trace/src/trace.cc2
-rw-r--r--runtime/contrib/heap_trace/src/trace.h2
-rw-r--r--runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc6
-rw-r--r--runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc4
-rw-r--r--runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc10
-rw-r--r--runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc2
-rw-r--r--runtime/contrib/heap_trace/tests/src/trace_test.cc16
-rw-r--r--runtime/contrib/labs/jniacl/src/jniacl_main.cc11
-rw-r--r--runtime/contrib/labs/opencl_test/src/opencl_test.cc8
-rw-r--r--runtime/contrib/labs/tflite_examples/src/conv.cpp8
-rw-r--r--runtime/contrib/style_transfer_app/src/bitmap_helper.cc32
-rw-r--r--runtime/contrib/style_transfer_app/src/jpeg_helper.cc2
-rw-r--r--runtime/contrib/style_transfer_app/src/style_transfer_app.cc8
-rw-r--r--runtime/contrib/tflite_classify/src/ImageClassifier.cc6
-rw-r--r--runtime/contrib/tflite_classify/src/InferenceInterface.cc2
-rw-r--r--runtime/contrib/tflite_classify/src/tflite_classify.cc5
l---------runtime/libs/.clang-format1
-rw-r--r--runtime/libs/benchmark/src/CsvWriter.cpp2
-rw-r--r--runtime/libs/benchmark/src/MemoryPoller.cpp2
-rw-r--r--runtime/libs/benchmark/src/Result.cpp12
-rw-r--r--runtime/libs/misc/include/misc/feature/Index.h2
-rw-r--r--runtime/libs/misc/include/misc/feature/Shape.h2
-rw-r--r--runtime/libs/misc/include/misc/kernel/Shape.h2
-rw-r--r--runtime/libs/misc/include/misc/tensor/Object.h5
-rw-r--r--runtime/libs/misc/include/misc/tensor/Zipper.h4
-rw-r--r--runtime/libs/misc/src/tensor/Comparator.cpp24
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksShim.h8
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksTypes.h45
-rw-r--r--runtime/libs/rua/dyn/src/DynamicBinder.cpp12
-rw-r--r--runtime/libs/tflite/include/tflite/Diff.h2
-rw-r--r--runtime/libs/tflite/include/tflite/RandomTestRunner.h2
-rw-r--r--runtime/libs/tflite/include/tflite/TensorLogger.h5
-rw-r--r--runtime/libs/tflite/src/Diff.cpp22
-rw-r--r--runtime/libs/tflite/src/RandomTestRunner.cpp100
l---------runtime/onert/api/.clang-format1
-rw-r--r--runtime/onert/api/include/nnfw.h12
-rw-r--r--runtime/onert/api/include/nnfw_internal.h9
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/CustomKernel.cc2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc8
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc158
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h8
-rw-r--r--runtime/onert/backend/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h12
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.cc302
-rw-r--r--runtime/onert/backend/acl_cl/BackendContext.h70
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc2
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h6
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc82
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h59
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.h7
-rw-r--r--runtime/onert/backend/acl_cl/acl_cl.cc15
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.cc2
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h4
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h29
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc18
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h12
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.cc302
-rw-r--r--runtime/onert/backend/acl_neon/BackendContext.h71
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h6
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc79
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h25
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.h7
-rw-r--r--runtime/onert/backend/acl_neon/acl_neon.cc15
-rw-r--r--runtime/onert/backend/cpu/Backend.h2
-rw-r--r--runtime/onert/backend/cpu/BackendContext.cc147
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h35
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h38
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h3
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc72
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h76
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.cc107
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.h33
-rw-r--r--runtime/onert/backend/cpu/Tensor.h87
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h19
-rw-r--r--runtime/onert/backend/cpu/cpu.cc15
-rw-r--r--runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc6
-rw-r--r--runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/ConcatLayer.cc34
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc2
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc78
-rw-r--r--runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc14
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc30
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc32
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h2
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc7
-rw-r--r--runtime/onert/backend/cpu/ops/ExpandDimsLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.cc22
-rw-r--r--runtime/onert/backend/cpu/ops/FillLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/MeanLayer.cc25
-rw-r--r--runtime/onert/backend/ruy/Backend.h68
-rw-r--r--runtime/onert/backend/ruy/BackendContext.cc147
-rw-r--r--runtime/onert/backend/ruy/BackendContext.h78
-rw-r--r--runtime/onert/backend/ruy/CMakeLists.txt22
-rw-r--r--runtime/onert/backend/ruy/Config.cc (renamed from runtime/onert/backend/cpu/Tensor.cc)10
-rw-r--r--runtime/onert/backend/ruy/Config.h48
-rw-r--r--runtime/onert/backend/ruy/ConstantInitializer.h35
-rw-r--r--runtime/onert/backend/ruy/ExternalContext.h60
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.cc171
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.h64
-rw-r--r--runtime/onert/backend/ruy/StaticTensorManager.h35
-rw-r--r--runtime/onert/backend/ruy/Tensor.h37
-rw-r--r--runtime/onert/backend/ruy/TensorBuilder.cc90
-rw-r--r--runtime/onert/backend/ruy/TensorBuilder.h73
-rw-r--r--runtime/onert/backend/ruy/ops/ConvolutionLayer.cc153
-rw-r--r--runtime/onert/backend/ruy/ops/ConvolutionLayer.h90
-rw-r--r--runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc103
-rw-r--r--runtime/onert/backend/ruy/ops/FullyConnectedLayer.h69
-rw-r--r--runtime/onert/backend/ruy/ops/OperationUtils.cc47
-rw-r--r--runtime/onert/backend/ruy/ops/OperationUtils.h123
-rw-r--r--runtime/onert/backend/ruy/ruy.cc24
-rw-r--r--runtime/onert/backend/xnnpack/Backend.h68
-rw-r--r--runtime/onert/backend/xnnpack/BackendContext.cc147
-rw-r--r--runtime/onert/backend/xnnpack/BackendContext.h85
-rw-r--r--runtime/onert/backend/xnnpack/CMakeLists.txt26
-rw-r--r--runtime/onert/backend/xnnpack/Config.cc (renamed from runtime/onert/core/include/backend/IOptimizer.h)41
-rw-r--r--runtime/onert/backend/xnnpack/Config.h51
-rw-r--r--runtime/onert/backend/xnnpack/ConstantInitializer.h35
-rw-r--r--runtime/onert/backend/xnnpack/ExternalContext.cc (renamed from runtime/onert/core/include/backend/IExternalContext.h)18
-rw-r--r--runtime/onert/backend/xnnpack/ExternalContext.h46
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.cc197
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.h65
-rw-r--r--runtime/onert/backend/xnnpack/StaticTensorManager.h35
-rw-r--r--runtime/onert/backend/xnnpack/Tensor.h37
-rw-r--r--runtime/onert/backend/xnnpack/TensorBuilder.cc90
-rw-r--r--runtime/onert/backend/xnnpack/TensorBuilder.h73
-rw-r--r--runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc149
-rw-r--r--runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h77
-rw-r--r--runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc150
-rw-r--r--runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h77
-rw-r--r--runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc138
-rw-r--r--runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h61
-rw-r--r--runtime/onert/backend/xnnpack/ops/Layer.h81
-rw-r--r--runtime/onert/backend/xnnpack/ops/OperationUtils.h75
-rw-r--r--runtime/onert/backend/xnnpack/xnnpack.cc33
-rw-r--r--runtime/onert/core/include/backend/BackendContext.h41
-rw-r--r--runtime/onert/core/include/backend/ITensorBuilder.h108
-rw-r--r--runtime/onert/core/include/backend/ITensorRegister.h97
-rw-r--r--runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h193
-rw-r--r--runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h58
-rw-r--r--runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h (renamed from runtime/onert/core/include/backend/IConstantInitializer.h)24
-rw-r--r--runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h (renamed from runtime/onert/core/include/backend/IKernelGenerator.h)15
-rw-r--r--runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h13
-rw-r--r--runtime/onert/core/include/backend/cpu_common/Tensor.h86
-rw-r--r--runtime/onert/core/include/compiler/BackendManager.h2
-rw-r--r--runtime/onert/core/include/compiler/Compiler.h7
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h3
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInferer.h2
-rw-r--r--runtime/onert/core/include/exec/DynamicShapeInferer.h2
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h43
-rw-r--r--runtime/onert/core/include/ir/DataType.h1
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h92
-rw-r--r--runtime/onert/core/include/ir/Operations.lst92
-rw-r--r--runtime/onert/core/include/ir/Subgraphs.h2
-rw-r--r--runtime/onert/core/include/ir/operation/ArgMinMax.h (renamed from runtime/onert/core/include/ir/operation/ArgMax.h)15
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseUnary.h8
-rw-r--r--runtime/onert/core/include/ir/operation/Fill.h2
-rw-r--r--runtime/onert/core/include/util/Config.lst3
-rw-r--r--runtime/onert/core/include/util/ConfigSource.h1
-rw-r--r--runtime/onert/core/include/util/ShapeInference.h4
-rw-r--r--runtime/onert/core/include/util/TracingCtx.h94
-rw-r--r--runtime/onert/core/include/util/logging.h7
-rw-r--r--runtime/onert/core/src/backend/BackendContext.cc21
-rw-r--r--runtime/onert/core/src/backend/controlflow/Backend.h2
-rw-r--r--runtime/onert/core/src/backend/controlflow/BackendContext.cc142
-rw-r--r--runtime/onert/core/src/backend/controlflow/BackendContext.h36
-rw-r--r--runtime/onert/core/src/backend/controlflow/ConstantInitializer.h21
-rw-r--r--runtime/onert/core/src/backend/controlflow/ExternalContext.h3
-rw-r--r--runtime/onert/core/src/backend/controlflow/IOTensor.cc52
-rw-r--r--runtime/onert/core/src/backend/controlflow/IOTensor.h94
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.cc38
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.h12
-rw-r--r--runtime/onert/core/src/backend/controlflow/Tensor.h1
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.cc12
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.h19
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorRegistry.h33
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc81
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h16
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h1
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc188
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h18
-rw-r--r--runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc17
-rw-r--r--runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc (renamed from runtime/onert/backend/cpu/ConstantInitializer.cc)46
-rw-r--r--runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc (renamed from runtime/onert/core/src/backend/IConstantInitializer.cc)27
-rw-r--r--runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc44
-rw-r--r--runtime/onert/core/src/backend/cpu_common/Tensor.cc17
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc96
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc62
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc370
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h3
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc187
-rw-r--r--runtime/onert/core/src/compiler/Linear.h3
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc71
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc9
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.cc22
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.h2
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc43
-rw-r--r--runtime/onert/core/src/compiler/TensorBuilders.h78
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc12
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc17
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h6
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInferer.cc41
-rw-r--r--runtime/onert/core/src/exec/ExecTime.h2
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.cc19
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.h9
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc111
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h44
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc113
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h54
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.h3
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.cc2
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.h12
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.cc10
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h7
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc21
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h6
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.h9
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc2
-rw-r--r--runtime/onert/core/src/ir/DataType.cc1
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc19
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h3
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.cc136
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.h5
-rw-r--r--runtime/onert/core/src/ir/operation/ArgMinMax.cc (renamed from runtime/onert/core/src/ir/operation/ArgMax.cc)11
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc2
-rw-r--r--runtime/onert/core/src/util/ConfigSource.cc11
-rw-r--r--runtime/onert/core/src/util/EventCollector.cc12
-rw-r--r--runtime/onert/core/src/util/EventCollector.h20
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.cc94
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.h155
-rw-r--r--runtime/onert/core/src/util/EventRecorder.h5
-rw-r--r--runtime/onert/core/src/util/EventWriter.cc151
-rw-r--r--runtime/onert/core/src/util/EventWriter.h100
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc14
-rw-r--r--runtime/onert/core/src/util/TracingCtx.cc29
l---------runtime/onert/frontend/.clang-format1
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h185
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc2
-rw-r--r--runtime/onert/frontend/circle_schema/include/circle_schema_generated.h820
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc4
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h9
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc12
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc10
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc7
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc136
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.h2
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_schema_generated.h652
l---------runtime/onert/sample/.clang-format1
l---------runtime/onert/test/.clang-format1
-rw-r--r--runtime/onert/test/core/compiler/HEScheduler.cc (renamed from runtime/onert/test/core/compiler/Scheduler.cc)43
-rw-r--r--runtime/onert/test/core/exec/ExecInstance.cc18
-rw-r--r--runtime/onert/test/core/exec/ExecTime.test.cc8
-rw-r--r--runtime/onert/test/core/interp/ExecManager.cc23
-rw-r--r--runtime/onert/test/graph/MockNode.h2
-rw-r--r--runtime/onert/test/graph/operand/UseDef.cc6
-rw-r--r--runtime/onert/test/util/ShapeInference.cc24
273 files changed, 8512 insertions, 3789 deletions
diff --git a/runtime/contrib/.clang-format b/runtime/contrib/.clang-format
new file mode 120000
index 000000000..f761fe4ae
--- /dev/null
+++ b/runtime/contrib/.clang-format
@@ -0,0 +1 @@
+../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
index 69dfcc7b2..3d71f89aa 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
@@ -47,12 +47,11 @@ extern "C" {
} \
} while (0)
#else // __TIZEN__
-#define LEVEL_TO_STR(level) \
- (((level) == ERROR) \
- ? "ERROR" \
- : ((level) == WARNING) \
- ? "WARNING" \
- : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
+#define LEVEL_TO_STR(level) \
+ (((level) == ERROR) ? "ERROR" \
+ : ((level) == WARNING) \
+ ? "WARNING" \
+ : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
#define TFLITE_NATIVE_LOG(log_level, format, args...) \
do \
{ \
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
index b099ba9ba..2fb98cc93 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
@@ -26,7 +26,8 @@
extern "C" {
#endif /*__cplusplus*/
-typedef enum {
+typedef enum
+{
/** 32-bit signed integer. */
INT32 = 1,
diff --git a/runtime/contrib/android/api/Prebuilt.mk b/runtime/contrib/android/api/Prebuilt.mk
index 7d9f56582..c00c7d3da 100644
--- a/runtime/contrib/android/api/Prebuilt.mk
+++ b/runtime/contrib/android/api/Prebuilt.mk
@@ -21,14 +21,6 @@ LOCAL_SRC_FILES := \
$(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so
include $(PREBUILT_SHARED_LIBRARY)
-# libtensorflowlite_jni
-include $(CLEAR_VARS)
-LOCAL_MODULE := tensorflowlite_jni
-PREBUILT_LIB += tensorflowlite_jni
-LOCAL_SRC_FILES := \
- $(ONERT_PREBUILT_LIB_DIR)/libtensorflowlite_jni.so
-include $(PREBUILT_SHARED_LIBRARY)
-
# libnnfw
include $(CLEAR_VARS)
LOCAL_MODULE := nnfw-dev
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index d383b2d1c..6bb7a5631 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.11.1"
+ versionName "1.12.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
index 209264d31..72e73bee6 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
@@ -52,7 +52,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
- JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
+ JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -103,7 +103,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
- JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -129,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
- JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -156,7 +156,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
- JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+ JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -178,7 +178,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
- JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+ JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -234,7 +234,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
- JNIEnv *env, jobject, jlong handle, jstring jbackends)
+ JNIEnv *env, jobject, jlong handle, jstring jbackends)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -255,7 +255,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
- JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
@@ -277,7 +277,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
- JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.h b/runtime/contrib/android/api/src/main/native/onert-native-api.h
index 13768d470..7997530ac 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.h
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.h
@@ -46,7 +46,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe
* Signature: (JLjava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
- JNIEnv *, jobject, jlong, jstring);
+ JNIEnv *, jobject, jlong, jstring);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -71,7 +71,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun
* Signature: (JIILjava/nio/ByteBuffer;I)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
- JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+ JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -79,7 +79,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
* Signature: (JIILjava/nio/ByteBuffer;I)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
- JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+ JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -87,7 +87,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
* Signature: (JII)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
- JNIEnv *, jobject, jlong, jint, jint);
+ JNIEnv *, jobject, jlong, jint, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -95,7 +95,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
* Signature: (JII)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
- JNIEnv *, jobject, jlong, jint, jint);
+ JNIEnv *, jobject, jlong, jint, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -121,7 +121,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp
* Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
- JNIEnv *, jobject, jlong, jint, jobject);
+ JNIEnv *, jobject, jlong, jint, jobject);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -129,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
* Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
- JNIEnv *, jobject, jlong, jint, jobject);
+ JNIEnv *, jobject, jlong, jint, jobject);
/*
* Class: com_samsung_onert_NativeSessionWrapper
@@ -137,7 +137,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
* Signature: (JLjava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
- JNIEnv *, jobject, jlong, jstring);
+ JNIEnv *, jobject, jlong, jstring);
#ifdef __cplusplus
}
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
index 4b0e4395f..8df179a3d 100644
--- a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
@@ -173,7 +173,7 @@ inline void runBenchmark(JNIEnv *env, jobject thisObj, Activity &act)
}
JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark(
- JNIEnv *env, jobject thisObj, jobject model_buffer)
+ JNIEnv *env, jobject thisObj, jobject model_buffer)
{
setTitle(env, thisObj, "Running Interpreter Benchmark");
diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc
index 5770701ea..597f11aa8 100644
--- a/runtime/contrib/android_tflite/builtin_ops_jni.cc
+++ b/runtime/contrib/android_tflite/builtin_ops_jni.cc
@@ -24,7 +24,7 @@ namespace tflite
std::unique_ptr<OpResolver> CreateOpResolver()
{
return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
- new ::nnfw::tflite::BuiltinOpResolver());
+ new ::nnfw::tflite::BuiltinOpResolver());
}
} // namespace tflite
diff --git a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
index d9d2700ee..2affbe066 100644
--- a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
+++ b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
@@ -31,8 +31,8 @@ cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void
static auto isOriginalFunctionCallSuccessful = [](cl_mem result) -> bool { return result; };
static auto originalFunction =
- findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
- "clCreateBuffer");
+ findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
+ "clCreateBuffer");
cl_mem result = originalFunction(context, flags, size, host_ptr, errcode_ret);
if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
{
diff --git a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
index 89797ad50..3186c7ffb 100644
--- a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
+++ b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
@@ -60,7 +60,7 @@ private:
{
uint8_t *ptr_to_the_free_space_after_allocation = _ptr_to_free_space_start + size;
size_t size_of_reserved_space_after_allocation =
- ptr_to_the_free_space_after_allocation - _buffer;
+ ptr_to_the_free_space_after_allocation - _buffer;
if (size_of_reserved_space_after_allocation >= MAX_SIZE)
{
return false;
diff --git a/runtime/contrib/heap_trace/src/trace.cc b/runtime/contrib/heap_trace/src/trace.cc
index 020aeb90e..39a0c465b 100644
--- a/runtime/contrib/heap_trace/src/trace.cc
+++ b/runtime/contrib/heap_trace/src/trace.cc
@@ -72,7 +72,7 @@ void Trace::logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space
if (found_memory_space_description == _memory_in_use_on_gpu.end())
{
_memory_in_use_on_gpu.insert(
- std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
+ std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
_total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
{
diff --git a/runtime/contrib/heap_trace/src/trace.h b/runtime/contrib/heap_trace/src/trace.h
index 647c51d54..33e67e58d 100644
--- a/runtime/contrib/heap_trace/src/trace.h
+++ b/runtime/contrib/heap_trace/src/trace.h
@@ -31,7 +31,7 @@ class Trace
size_t size;
MemoryTraits(size_t init_counter_value, size_t size_of_allocated_memory)
- : ref_counter(init_counter_value), size(size_of_allocated_memory)
+ : ref_counter(init_counter_value), size(size_of_allocated_memory)
{
}
};
diff --git a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
index 49b8fd994..a5700b28d 100644
--- a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
@@ -94,9 +94,9 @@ TEST_F(ClReleaseMemObjectStub, must_log_deallocation_event_only_if_reference_cou
clReleaseMemObject(mem);
GlobalTrace.reset();
ASSERT_STREQ(
- getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
- "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
- "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
+ getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
}
TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed)
diff --git a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
index ea3eb8256..182f52c21 100644
--- a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
@@ -87,8 +87,8 @@ TEST_F(MallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal
}
TEST_F(
- MallocStub,
- should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
+ MallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
{
void *p = malloc(0);
free(p);
diff --git a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
index 59660fad4..e81c5dc22 100644
--- a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
@@ -86,16 +86,16 @@ TEST_F(ReallocStub, should_work_as_malloc_when_incoming_ptr_is_equal_to_nullptr)
ASSERT_TRUE(p);
ASSERT_STREQ(
- getContentOfFile("./realloc_interception_test.log").c_str(),
- "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
- "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
free(p);
}
TEST_F(
- ReallocStub,
- should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
+ ReallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
{
void *p = realloc(nullptr, 0);
free(p);
diff --git a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
index 59fdeedc9..9ed933119 100644
--- a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
@@ -70,7 +70,7 @@ TEST_F(SymbolSearcher,
fs::path pathToTestSample2 = exePath() / "libtest_sample2.so";
void *test_sample2_handle = dlopen(pathToTestSample2.c_str(), RTLD_NOW);
void *func_addr_in_test_sample2 =
- dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
+ dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
ASSERT_TRUE(test_sample2_handle);
ASSERT_TRUE((void *)funcDefinedInTestSample3_ButWrappedInTestSample1 !=
diff --git a/runtime/contrib/heap_trace/tests/src/trace_test.cc b/runtime/contrib/heap_trace/tests/src/trace_test.cc
index 1cf4c530b..4f359bb6d 100644
--- a/runtime/contrib/heap_trace/tests/src/trace_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/trace_test.cc
@@ -114,15 +114,15 @@ TEST_F(Trace, should_work_correctly_in_multithreaded_environment)
GlobalTrace.reset();
string thisShouldBeInLogFile =
- "Total allocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
- " B, Total deallocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
string andThisToo =
- "Total allocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
- " B, Total deallocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
ASSERT_TRUE(getContentOfFile("./trace_test.log").find(thisShouldBeInLogFile) != string::npos);
ASSERT_TRUE(getContentOfFile("./trace_test.log").find(andThisToo) != string::npos);
}
diff --git a/runtime/contrib/labs/jniacl/src/jniacl_main.cc b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
index 01b928981..1a34aa70e 100644
--- a/runtime/contrib/labs/jniacl/src/jniacl_main.cc
+++ b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
@@ -36,12 +36,13 @@ Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
TargetHint target_hint = TargetHint::OPENCL;
bool autoinc = true;
- graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
- std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
+ graph << target_hint
+ << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
+ std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
<< arm_compute::graph::ConvolutionLayer(
- 3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
- std::unique_ptr<BiasAccessor>(new BiasAccessor()),
- arm_compute::PadStrideInfo(1, 1, 0, 0))
+ 3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
+ std::unique_ptr<BiasAccessor>(new BiasAccessor()),
+ arm_compute::PadStrideInfo(1, 1, 0, 0))
<< Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
graph.run();
diff --git a/runtime/contrib/labs/opencl_test/src/opencl_test.cc b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
index 1faa91478..68381837a 100644
--- a/runtime/contrib/labs/opencl_test/src/opencl_test.cc
+++ b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
@@ -199,7 +199,7 @@ void checkContextMem()
try
{
auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>(
- gpu.program_, "memory_test"); // name should be same as cl function name
+ gpu.program_, "memory_test"); // name should be same as cl function name
// create a queue per device and queue a kernel job
@@ -256,7 +256,7 @@ void printHelp()
std::cout << "opencl information: \n\n";
std::cout << "\t -h : help\n";
std::cout
- << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
+ << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
}
@@ -270,7 +270,7 @@ void printHelp()
int kernel_idx[MAX_DEVICE_NUM];
unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed.
unsigned char
- kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
+ kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
int device_num;
std::mutex kernel_complete_handler_mutex;
@@ -319,7 +319,7 @@ void testSync()
try
{
auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>(
- gpu.program_, "test"); // name should be same as cl function name
+ gpu.program_, "test"); // name should be same as cl function name
// variable init
cl::Event ev[MAX_DEVICE_NUM];
diff --git a/runtime/contrib/labs/tflite_examples/src/conv.cpp b/runtime/contrib/labs/tflite_examples/src/conv.cpp
index e8542c3f5..0b5f946bc 100644
--- a/runtime/contrib/labs/tflite_examples/src/conv.cpp
+++ b/runtime/contrib/labs/tflite_examples/src/conv.cpp
@@ -217,7 +217,7 @@ int main(int argc, char **argv)
// Configure Filter
const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
float kernel_data[kernel_size] = {
- 0.0f,
+ 0.0f,
};
// Fill kernel data in NHWC order
@@ -243,13 +243,13 @@ int main(int argc, char **argv)
}
interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
+ 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
+ quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
// Configure Bias
const uint32_t bias_size = bias.size();
float bias_data[bias_size] = {
- 0.0f,
+ 0.0f,
};
// Fill bias data
diff --git a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
index 6211ea476..0f687b2ee 100644
--- a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
+++ b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
@@ -49,10 +49,10 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p
int fileSize = fileHeaderSize + infoHeaderSize + (bytesPerPixel * width + paddingSize) * height;
static unsigned char fileHeader[] = {
- 0, 0, /// signature
- 0, 0, 0, 0, /// image file size in bytes
- 0, 0, 0, 0, /// reserved
- 0, 0, 0, 0, /// start of pixel array
+ 0, 0, /// signature
+ 0, 0, 0, 0, /// image file size in bytes
+ 0, 0, 0, 0, /// reserved
+ 0, 0, 0, 0, /// start of pixel array
};
fileHeader[0] = (unsigned char)('B');
@@ -69,17 +69,17 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p
unsigned char *BitmapHelper::createBitmapInfoHeader(int height, int width)
{
static unsigned char infoHeader[] = {
- 0, 0, 0, 0, /// header size
- 0, 0, 0, 0, /// image width
- 0, 0, 0, 0, /// image height
- 0, 0, /// number of color planes
- 0, 0, /// bits per pixel
- 0, 0, 0, 0, /// compression
- 0, 0, 0, 0, /// image size
- 0, 0, 0, 0, /// horizontal resolution
- 0, 0, 0, 0, /// vertical resolution
- 0, 0, 0, 0, /// colors in color table
- 0, 0, 0, 0, /// important color count
+ 0, 0, 0, 0, /// header size
+ 0, 0, 0, 0, /// image width
+ 0, 0, 0, 0, /// image height
+ 0, 0, /// number of color planes
+ 0, 0, /// bits per pixel
+ 0, 0, 0, 0, /// compression
+ 0, 0, 0, 0, /// image size
+ 0, 0, 0, 0, /// horizontal resolution
+ 0, 0, 0, 0, /// vertical resolution
+ 0, 0, 0, 0, /// colors in color table
+ 0, 0, 0, 0, /// important color count
};
// Minus height means top to bottom write
@@ -191,7 +191,7 @@ int BitmapHelper::read_bmp(const std::string &input_bmp_name, std::vector<float>
// Decode image, allocating tensor once the image size is known
const uint8_t *bmp_pixels = &img_bytes[header_size];
std::vector<uint8_t> bmp =
- decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
+ decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
for (uint32_t j = 0; j < bmp.size(); j++)
{
input.push_back(static_cast<float>(bmp[j]));
diff --git a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
index ed5ae25a1..1554524f8 100644
--- a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
+++ b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
@@ -26,7 +26,7 @@ namespace StyleTransferApp
{
JpegHelper::JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space)
- : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
+ : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
{
// DO NOTHING
}
diff --git a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
index eed0c4288..ab8735d43 100644
--- a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
+++ b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
@@ -68,10 +68,10 @@ uint64_t num_elems(const nnfw_tensorinfo *ti)
NNFW_STATUS resolve_op_backend(nnfw_session *session)
{
static std::unordered_map<std::string, std::string> operation_map = {
- {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
- {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
- {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
- {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
+ {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
+ {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
+ {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
+ {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
for (auto i : operation_map)
{
diff --git a/runtime/contrib/tflite_classify/src/ImageClassifier.cc b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
index fae4f066c..1d92d6c86 100644
--- a/runtime/contrib/tflite_classify/src/ImageClassifier.cc
+++ b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
@@ -24,9 +24,9 @@ ImageClassifier::ImageClassifier(const std::string &model_file, const std::strin
const int input_size, const int image_mean, const int image_std,
const std::string &input_name, const std::string &output_name,
const bool use_nnapi)
- : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
- _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
- _output_name(output_name)
+ : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
+ _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
+ _output_name(output_name)
{
// Load label
std::ifstream label_stream(label_file.c_str());
diff --git a/runtime/contrib/tflite_classify/src/InferenceInterface.cc b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
index 160943477..562ff2ad6 100644
--- a/runtime/contrib/tflite_classify/src/InferenceInterface.cc
+++ b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
@@ -20,7 +20,7 @@ using namespace tflite;
using namespace tflite::ops::builtin;
InferenceInterface::InferenceInterface(const std::string &model_file, const bool use_nnapi)
- : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
+ : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
{
// Load model
StderrReporter error_reporter;
diff --git a/runtime/contrib/tflite_classify/src/tflite_classify.cc b/runtime/contrib/tflite_classify/src/tflite_classify.cc
index 51758e2a6..7bed77875 100644
--- a/runtime/contrib/tflite_classify/src/tflite_classify.cc
+++ b/runtime/contrib/tflite_classify/src/tflite_classify.cc
@@ -60,9 +60,8 @@ int main(const int argc, char **argv)
}
// Create ImageClassifier
- std::unique_ptr<ImageClassifier> classifier(
- new ImageClassifier(MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME,
- OUTPUT_NAME, use_nnapi));
+ std::unique_ptr<ImageClassifier> classifier(new ImageClassifier(
+ MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME, OUTPUT_NAME, use_nnapi));
// Cam setting
cv::VideoCapture cap(0);
diff --git a/runtime/libs/.clang-format b/runtime/libs/.clang-format
new file mode 120000
index 000000000..f761fe4ae
--- /dev/null
+++ b/runtime/libs/.clang-format
@@ -0,0 +1 @@
+../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/libs/benchmark/src/CsvWriter.cpp b/runtime/libs/benchmark/src/CsvWriter.cpp
index 5f47c6511..6233129e7 100644
--- a/runtime/libs/benchmark/src/CsvWriter.cpp
+++ b/runtime/libs/benchmark/src/CsvWriter.cpp
@@ -35,7 +35,7 @@ CsvWriter::CsvWriter(const std::string &csv_filename) : CsvWriter(csv_filename,
}
CsvWriter::CsvWriter(const std::string &csv_filename, const std::vector<std::string> &header)
- : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
+ : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
{
assert(csv_filename.empty() == false);
assert(header.size() != 0);
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp
index 050b5b163..2f3c85589 100644
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -27,7 +27,7 @@ namespace benchmark
{
MemoryPoller::MemoryPoller(std::chrono::milliseconds duration, bool gpu_poll)
- : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
+ : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
{
if (prepareMemoryPolling() == false)
throw std::runtime_error("failed to prepare memory pooling");
diff --git a/runtime/libs/benchmark/src/Result.cpp b/runtime/libs/benchmark/src/Result.cpp
index e6cafb91c..03566874e 100644
--- a/runtime/libs/benchmark/src/Result.cpp
+++ b/runtime/libs/benchmark/src/Result.cpp
@@ -77,9 +77,9 @@ uint32_t averageMemoryKb(const benchmark::Phase &phase, int type)
return average<uint32_t, uint32_t>(phase.memory[type]);
}
-uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
- [benchmark::MemoryType::END_OF_MEM_TYPE],
- int type)
+uint32_t peakMemory(
+ const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE],
+ int type)
{
using namespace benchmark;
// tricky. handle WARMUP as EXECUTE
@@ -88,7 +88,7 @@ uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
}
void printResultTime(
- const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
+ const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
{
using namespace benchmark;
@@ -119,8 +119,8 @@ void printResultTime(
std::cout << "===================================" << std::endl;
}
-void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
- [benchmark::MemoryType::END_OF_MEM_TYPE])
+void printResultMemory(
+ const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE])
{
using namespace benchmark;
diff --git a/runtime/libs/misc/include/misc/feature/Index.h b/runtime/libs/misc/include/misc/feature/Index.h
index a361d8dd2..09d65a59a 100644
--- a/runtime/libs/misc/include/misc/feature/Index.h
+++ b/runtime/libs/misc/include/misc/feature/Index.h
@@ -62,7 +62,7 @@ public:
* @param[in] col The width index
*/
Index(int32_t batch, int32_t ch, int32_t row, int32_t col)
- : _batch{batch}, _ch{ch}, _row{row}, _col{col}
+ : _batch{batch}, _ch{ch}, _row{row}, _col{col}
{
// DO NOTHING
}
diff --git a/runtime/libs/misc/include/misc/feature/Shape.h b/runtime/libs/misc/include/misc/feature/Shape.h
index 09881f58b..2c31b457c 100644
--- a/runtime/libs/misc/include/misc/feature/Shape.h
+++ b/runtime/libs/misc/include/misc/feature/Shape.h
@@ -64,7 +64,7 @@ struct Shape
* @param[in] width The width value
*/
Shape(int32_t batch, int32_t depth, int32_t height, int32_t width)
- : N{batch}, C{depth}, H{height}, W{width}
+ : N{batch}, C{depth}, H{height}, W{width}
{
// DO NOTHING
}
diff --git a/runtime/libs/misc/include/misc/kernel/Shape.h b/runtime/libs/misc/include/misc/kernel/Shape.h
index 27d6a8bf0..176db0a11 100644
--- a/runtime/libs/misc/include/misc/kernel/Shape.h
+++ b/runtime/libs/misc/include/misc/kernel/Shape.h
@@ -55,7 +55,7 @@ struct Shape
* @param[in] width The width index
*/
Shape(int32_t count, int32_t depth, int32_t height, int32_t width)
- : N{count}, C{depth}, H{height}, W{width}
+ : N{count}, C{depth}, H{height}, W{width}
{
// DO NOTHING
}
diff --git a/runtime/libs/misc/include/misc/tensor/Object.h b/runtime/libs/misc/include/misc/tensor/Object.h
index cba4f1baf..15ad6da4f 100644
--- a/runtime/libs/misc/include/misc/tensor/Object.h
+++ b/runtime/libs/misc/include/misc/tensor/Object.h
@@ -74,9 +74,8 @@ public:
_values.resize(_shape.dim(0) * _stride.at(0));
// Set 'value'
- iterate(_shape) << [this, &fn](const Index &index) {
- _values.at(_stride.offset(index)) = fn(_shape, index);
- };
+ iterate(_shape) <<
+ [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); };
}
}
diff --git a/runtime/libs/misc/include/misc/tensor/Zipper.h b/runtime/libs/misc/include/misc/tensor/Zipper.h
index 8f0ec4ab6..b1ca3d003 100644
--- a/runtime/libs/misc/include/misc/tensor/Zipper.h
+++ b/runtime/libs/misc/include/misc/tensor/Zipper.h
@@ -48,7 +48,7 @@ public:
* @param[in] rhs @c Reader object of a tensor
*/
Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs)
- : _shape{shape}, _lhs{lhs}, _rhs{rhs}
+ : _shape{shape}, _lhs{lhs}, _rhs{rhs}
{
// DO NOTHING
}
@@ -63,7 +63,7 @@ public:
template <typename Callable> void zip(Callable cb) const
{
iterate(_shape) <<
- [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
+ [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
}
private:
diff --git a/runtime/libs/misc/src/tensor/Comparator.cpp b/runtime/libs/misc/src/tensor/Comparator.cpp
index 80a18c11a..5fcf38cc8 100644
--- a/runtime/libs/misc/src/tensor/Comparator.cpp
+++ b/runtime/libs/misc/src/tensor/Comparator.cpp
@@ -33,18 +33,18 @@ std::vector<Diff<float>> Comparator::compare(const Shape &shape, const Reader<fl
std::vector<Diff<float>> res;
zip(shape, expected, obtained) <<
- [&](const Index &index, float expected_value, float obtained_value) {
- if (!_compare_fn(expected_value, obtained_value))
- {
- res.emplace_back(index, expected_value, obtained_value);
- }
-
- // Update max_diff_index, if necessary
- if (observer != nullptr)
- {
- observer->notify(index, expected_value, obtained_value);
- }
- };
+ [&](const Index &index, float expected_value, float obtained_value) {
+ if (!_compare_fn(expected_value, obtained_value))
+ {
+ res.emplace_back(index, expected_value, obtained_value);
+ }
+
+ // Update max_diff_index, if necessary
+ if (observer != nullptr)
+ {
+ observer->notify(index, expected_value, obtained_value);
+ }
+ };
return res;
}
diff --git a/runtime/libs/nnapi/include/NeuralNetworksShim.h b/runtime/libs/nnapi/include/NeuralNetworksShim.h
index 9cf52aafa..2e8ccdb76 100644
--- a/runtime/libs/nnapi/include/NeuralNetworksShim.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksShim.h
@@ -225,8 +225,8 @@ inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int
* @return ANEURALNETWORKS_NO_ERROR if successful.
*/
inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
- ANeuralNetworksModel *model, int32_t index,
- const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
+ ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
{
LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams);
EXECUTE_FUNCTION_RETURN(model, index, channelQuant);
@@ -1218,7 +1218,7 @@ inline int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel *mo
LOAD_FUNCTION(ANeuralNetworksModel_setOperandExtensionData);
EXECUTE_FUNCTION_RETURN(model, index, data, length);
}
-
+#if __ANDROID_API__ >= 30
/**
* Create a {@link ANeuralNetworksMemoryDesc} with no properties.
*
@@ -1548,7 +1548,7 @@ inline int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory *src,
LOAD_FUNCTION(ANeuralNetworksMemory_copy);
EXECUTE_FUNCTION_RETURN(src, dst);
}
-
+#endif // __ANDROID_API__ >= 30
/**/
#endif // __NEURAL_NETWORKS_SHIM_H__
diff --git a/runtime/libs/nnapi/include/NeuralNetworksTypes.h b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
index 2e0568791..35c7a5802 100644
--- a/runtime/libs/nnapi/include/NeuralNetworksTypes.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
@@ -56,12 +56,12 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *mod
const void *buffer, size_t length);
typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)(
- ANeuralNetworksModel *model, int32_t index,
- const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
+ ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
- ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
- size_t length);
+ ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length);
typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
ANeuralNetworksOperationType type,
@@ -88,8 +88,8 @@ typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *ex
const void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
int32_t index,
@@ -97,8 +97,8 @@ typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *e
void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
ANeuralNetworksEvent **event);
@@ -125,35 +125,39 @@ typedef int (*ANeuralNetworksDevice_getFeatureLevel_fn)(const ANeuralNetworksDev
int64_t *featureLevel);
typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)(
- const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
- uint32_t numDevices, bool *supportedOps);
+ const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
+ uint32_t numDevices, bool *supportedOps);
typedef int (*ANeuralNetworksCompilation_createForDevices_fn)(
- ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
- ANeuralNetworksCompilation **compilation);
+ ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
+ ANeuralNetworksCompilation **compilation);
typedef int (*ANeuralNetworksCompilation_setCaching_fn)(ANeuralNetworksCompilation *compilation,
const char *cacheDir, const uint8_t *token);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(ANeuralNetworksCompilation *compilation,
uint64_t duration);
typedef int (*ANeuralNetworksCompilation_setPriority_fn)(ANeuralNetworksCompilation *compilation,
int priority);
+#endif // __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_compute_fn)(ANeuralNetworksExecution *execution);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_setTimeout_fn)(ANeuralNetworksExecution *execution,
uint64_t duration);
typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(ANeuralNetworksExecution *execution,
uint64_t duration);
+#endif // __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(ANeuralNetworksExecution *execution,
int32_t index, uint32_t *rank);
typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)(
- ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
+ ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
typedef int (*ANeuralNetworksBurst_create_fn)(ANeuralNetworksCompilation *compilation,
ANeuralNetworksBurst **burst);
@@ -182,24 +186,25 @@ typedef int (*ANeuralNetworksModel_getExtensionOperandType_fn)(ANeuralNetworksMo
int32_t *type);
typedef int (*ANeuralNetworksModel_getExtensionOperationType_fn)(
- ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension,
- ANeuralNetworksOperationType *type);
+ ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension,
+ ANeuralNetworksOperationType *type);
typedef int (*ANeuralNetworksModel_setOperandExtensionData_fn)(ANeuralNetworksModel *model,
int32_t index, const void *data,
size_t length);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksMemoryDesc_create_fn)(ANeuralNetworksMemoryDesc **desc);
typedef void (*ANeuralNetworksMemoryDesc_free_fn)(ANeuralNetworksMemoryDesc *desc);
typedef int (*ANeuralNetworksMemoryDesc_addInputRole_fn)(
- ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index,
- float frequency);
+ ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index,
+ float frequency);
typedef int (*ANeuralNetworksMemoryDesc_addOutputRole_fn)(
- ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index,
- float frequency);
+ ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index,
+ float frequency);
typedef int (*ANeuralNetworksMemoryDesc_setDimensions_fn)(ANeuralNetworksMemoryDesc *desc,
uint32_t rank,
@@ -212,5 +217,5 @@ typedef int (*ANeuralNetworksMemory_createFromDesc_fn)(const ANeuralNetworksMemo
typedef int (*ANeuralNetworksMemory_copy_fn)(const ANeuralNetworksMemory *src,
const ANeuralNetworksMemory *dst);
-
+#endif // __ANDROID_API__ >= 30
#endif // __NEURAL_NETWORKS_TYPES_H__
diff --git a/runtime/libs/rua/dyn/src/DynamicBinder.cpp b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
index fa3f0bb1e..f49892de1 100644
--- a/runtime/libs/rua/dyn/src/DynamicBinder.cpp
+++ b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
@@ -97,8 +97,8 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *mod
const void *buffer, size_t length);
typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
- ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
- size_t length);
+ ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length);
typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
ANeuralNetworksOperationType type,
@@ -242,8 +242,8 @@ typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *ex
const void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
int32_t index,
@@ -251,8 +251,8 @@ typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *e
void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
ANeuralNetworksEvent **event);
diff --git a/runtime/libs/tflite/include/tflite/Diff.h b/runtime/libs/tflite/include/tflite/Diff.h
index fdc1a310b..1c35b3450 100644
--- a/runtime/libs/tflite/include/tflite/Diff.h
+++ b/runtime/libs/tflite/include/tflite/Diff.h
@@ -47,7 +47,7 @@ public:
* @param[in] comparator Comparator object for tensor comparation
*/
TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator)
- : _verbose{false}, _comparator(comparator)
+ : _verbose{false}, _comparator(comparator)
{
// DO NOTHING
}
diff --git a/runtime/libs/tflite/include/tflite/RandomTestRunner.h b/runtime/libs/tflite/include/tflite/RandomTestRunner.h
index c0b304c74..abbf3b224 100644
--- a/runtime/libs/tflite/include/tflite/RandomTestRunner.h
+++ b/runtime/libs/tflite/include/tflite/RandomTestRunner.h
@@ -55,7 +55,7 @@ public:
* @param[in] quantization TfLiteQuantizationParams type to represent quantization value
*/
RandomTestRunner(uint32_t seed, const RandomTestParam &param)
- : _randgen{seed, 0.0f, 2.0f}, _param{param}
+ : _randgen{seed, 0.0f, 2.0f}, _param{param}
{
// DO NOTHING
}
diff --git a/runtime/libs/tflite/include/tflite/TensorLogger.h b/runtime/libs/tflite/include/tflite/TensorLogger.h
index a824c3411..0837dfc94 100644
--- a/runtime/libs/tflite/include/tflite/TensorLogger.h
+++ b/runtime/libs/tflite/include/tflite/TensorLogger.h
@@ -107,9 +107,8 @@ private:
const TfLiteTensor *tensor = interp.tensor(id);
_outfile << "# tensor name: " << tensor->name << std::endl;
- _outfile << "# tflite::interpreter.tensor(" << id << ") -> "
- "tensor_value_gen["
- << log_index << "]" << std::endl;
+ _outfile << "# tflite::interpreter.tensor(" << id << ") -> tensor_value_gen[" << log_index
+ << "]" << std::endl;
if (tensor->type == kTfLiteInt32)
{
diff --git a/runtime/libs/tflite/src/Diff.cpp b/runtime/libs/tflite/src/Diff.cpp
index 39f994352..2d2b66e40 100644
--- a/runtime/libs/tflite/src/Diff.cpp
+++ b/runtime/libs/tflite/src/Diff.cpp
@@ -29,9 +29,9 @@ class DiffSummary : public nnfw::misc::tensor::Comparator::Observer
{
public:
DiffSummary()
- : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
- max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
- max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
+ : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
+ max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
+ max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
{
// DO NOTHING
}
@@ -86,12 +86,12 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie
using nnfw::misc::tensor::zip;
zip(expected.shape(), expected, obtained)
- << [&](const Index &index, T expected_value, T obtained_value) {
- if (expected_value != obtained_value)
- {
- diffs.emplace_back(index, expected_value, obtained_value);
- }
- };
+ << [&](const Index &index, T expected_value, T obtained_value) {
+ if (expected_value != obtained_value)
+ {
+ diffs.emplace_back(index, expected_value, obtained_value);
+ }
+ };
// TODO Unify summary generation code
if (diffs.size() == 0)
@@ -121,8 +121,8 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie
template <>
bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
- const nnfw::tflite::TensorView<float> &expected,
- const nnfw::tflite::TensorView<float> &obtained, int id) const
+ const nnfw::tflite::TensorView<float> &expected, const nnfw::tflite::TensorView<float> &obtained,
+ int id) const
{
DiffSummary summary;
diff --git a/runtime/libs/tflite/src/RandomTestRunner.cpp b/runtime/libs/tflite/src/RandomTestRunner.cpp
index f7fccbf3b..3fa9a973f 100644
--- a/runtime/libs/tflite/src/RandomTestRunner.cpp
+++ b/runtime/libs/tflite/src/RandomTestRunner.cpp
@@ -68,12 +68,12 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
int32_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- ++value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ ++value;
+ };
};
// Generate singed 32-bit integer (s32) input
@@ -89,11 +89,11 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
int32_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -106,19 +106,19 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -131,8 +131,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
@@ -140,10 +140,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
uint8_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -156,20 +156,20 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<float>);
const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -182,8 +182,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<float>);
const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
@@ -192,10 +192,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
float value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -208,20 +208,20 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
@@ -234,8 +234,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
@@ -244,10 +244,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
bool value = false;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
// Fill IFM with random numbers
diff --git a/runtime/onert/api/.clang-format b/runtime/onert/api/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/api/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 76380b4b8..6eb7e6ba9 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -64,7 +64,8 @@ typedef struct nnfw_session nnfw_session;
*
* The type of tensor represented in {@link nnfw_tensorinfo}
*/
-typedef enum {
+typedef enum
+{
/** A tensor of 32 bit floating point */
NNFW_TYPE_TENSOR_FLOAT32 = 0,
/** A tensor of 32 bit signed integer */
@@ -96,7 +97,8 @@ typedef enum {
/**
* @brief Result values returned from a call to an API function
*/
-typedef enum {
+typedef enum
+{
/** Successful */
NNFW_STATUS_NO_ERROR = 0,
/**
@@ -117,7 +119,8 @@ typedef enum {
/**
* @brief Data format of a tensor
*/
-typedef enum {
+typedef enum
+{
/** Don't care layout */
NNFW_LAYOUT_NONE = 0,
/**
@@ -135,7 +138,8 @@ typedef enum {
/**
* @brief Information ID for retrieving information on nnfw (e.g. version)
*/
-typedef enum {
+typedef enum
+{
/** nnfw runtime version
* Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
*/
diff --git a/runtime/onert/api/include/nnfw_internal.h b/runtime/onert/api/include/nnfw_internal.h
index eb4b6d629..a88e32436 100644
--- a/runtime/onert/api/include/nnfw_internal.h
+++ b/runtime/onert/api/include/nnfw_internal.h
@@ -35,4 +35,13 @@ NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value,
*/
NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
+/**
+ * @brief Load a tflite/circle model from file.
+ *
+ * @param[in] session session
+ * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension
+ * @return NFNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path);
+
#endif // __NNFW_INTERNAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 31c3890e3..28703c0eb 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000b01
+#define NNFW_VERSION 0x01000c00
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc
index 3f3a5d81e..56525feff 100644
--- a/runtime/onert/api/src/CustomKernel.cc
+++ b/runtime/onert/api/src/CustomKernel.cc
@@ -65,7 +65,7 @@ public:
};
Kernel::Kernel(const nnfw_custom_eval evalFunction)
- : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
+ : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
{
}
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index 835b2078a..4eba4ecec 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -90,7 +90,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session)
NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
{
NNFW_RETURN_ERROR_IF_NULL(session);
- return session->load_model_from_file(pacakge_file_path);
+ return session->load_model_from_nnpackage(pacakge_file_path);
}
/*
@@ -350,6 +350,12 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
return session->load_circle_from_buffer(buffer, size);
}
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->load_model_from_modelfile(file_path);
+}
+
NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
{
NNFW_RETURN_ERROR_IF_NULL(session);
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index a4c69eb4f..c3fdb131b 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -19,17 +19,19 @@
#include "compiler/Compiler.h"
#include "util/ConfigSource.h"
#include "util/Exceptions.h"
+#include "util/logging.h"
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
#include "json/json.h"
#include "ir/OpCode.h"
+#include "util/TracingCtx.h"
+
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <dirent.h>
-#include <util/ConfigSource.h>
#include <misc/string_helpers.h>
/*
@@ -40,8 +42,11 @@
#define MAX_PATH_LENGTH 1024
#define MAX_TENSOR_NAME_LENGTH 64
+namespace
+{
+
// Is null-terminating in length ?
-static bool null_terminating(const char *str, uint32_t length)
+bool null_terminating(const char *str, uint32_t length)
{
for (uint32_t i = 0; i < length; i++)
{
@@ -53,7 +58,7 @@ static bool null_terminating(const char *str, uint32_t length)
return false;
}
-static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
+onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
{
if (layout == NNFW_LAYOUT_CHANNELS_LAST)
{
@@ -92,9 +97,70 @@ NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensor
}
}
+std::string trim(const std::string &value)
+{
+ std::string whitespace = " \t";
+ auto begin = value.find_first_not_of(whitespace);
+ if (begin == std::string::npos)
+ return ""; // no content
+
+ auto end = value.find_last_not_of(whitespace);
+ auto range = end - begin + 1;
+ return value.substr(begin, range);
+}
+
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+{
+ std::ifstream ifs(cfgfile);
+ if (ifs.is_open())
+ {
+ std::string line;
+ while (std::getline(ifs, line))
+ {
+ auto cmtpos = line.find('#');
+ if (cmtpos != std::string::npos)
+ {
+ line = line.substr(0, cmtpos);
+ }
+ std::istringstream isline(line);
+ std::string key;
+ if (std::getline(isline, key, '='))
+ {
+ std::string value;
+ if (std::getline(isline, value))
+ {
+ key = trim(key);
+ keyValues[key] = trim(value);
+ }
+ }
+ }
+ ifs.close();
+ return true;
+ }
+ return false;
+}
+
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+ auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
+
+ for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+ {
+ VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+ configsrc->set(it->first, it->second);
+ }
+
+ onert::util::config_source_ext(std::move(configsrc));
+}
+
+} // namespace
+
nnfw_session::nnfw_session()
- : _subgraphs{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
+ : _subgraphs{nullptr}, _execution{nullptr},
+ _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{
+ nullptr}
{
// DO NOTHING
}
@@ -122,13 +188,65 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
return NNFW_STATUS_ERROR;
}
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
+
+ _state = State::MODEL_LOADED;
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
+{
+ if (!isStateInitialized())
+ return NNFW_STATUS_INVALID_STATE;
+
+ if (!model_file_path)
+ {
+ std::cerr << "Model file path is null." << std::endl;
+ return NNFW_STATUS_UNEXPECTED_NULL;
+ }
+
+ std::string filename{model_file_path};
+ if (filename.size() < 8) // .tflite or .circle
+ {
+ std::cerr << "Invalid model file path." << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ std::string model_type = filename.substr(filename.size() - 7, 7);
+
+ try
+ {
+ if (model_type == ".tflite")
+ {
+ _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
+ }
+ else if (model_type == ".circle")
+ {
+ _subgraphs = onert::circle_loader::loadModel(filename.c_str());
+ }
+ else
+ {
+ std::cerr << "Unsupported model type" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during model loading : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
_state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
{
if (!isStateInitialized())
return NNFW_STATUS_INVALID_STATE;
@@ -166,6 +284,18 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
mfs >> root;
const Json::Value &models = root["models"];
const Json::Value &model_types = root["model-types"];
+ const Json::Value &configs = root["configs"];
+
+ if (!configs.empty() && !configs[0].empty())
+ {
+ auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString();
+
+ CfgKeyValues keyValues;
+ if (loadConfigure(filepath, keyValues))
+ {
+ setConfigKeyValues(keyValues);
+ }
+ }
auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
auto model_type = model_types[0].asString(); // first model's type
@@ -190,7 +320,9 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
return NNFW_STATUS_ERROR;
}
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
_state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
@@ -225,7 +357,7 @@ NNFW_STATUS nnfw_session::prepare()
{
_subgraphs.reset();
std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
- _execution = std::make_shared<onert::exec::Execution>(executors);
+ _execution = std::make_unique<onert::exec::Execution>(executors);
}
catch (const std::exception &e)
{
@@ -308,8 +440,8 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
if (!buffer && length != 0)
{
std::cerr
- << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
- << std::endl;
+ << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
+ << std::endl;
return NNFW_STATUS_ERROR;
}
@@ -337,8 +469,8 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
if (!buffer && length != 0)
{
std::cerr
- << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
- << std::endl;
+ << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
+ << std::endl;
return NNFW_STATUS_ERROR;
}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 604ba38b4..a50ac72d3 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -21,6 +21,7 @@
#include "nnfw_experimental.h"
#include <util/GeneralConfigSource.h>
+#include <util/TracingCtx.h>
#include <string>
#include <memory>
@@ -100,7 +101,7 @@ public:
nnfw_session();
~nnfw_session();
- NNFW_STATUS load_model_from_file(const char *package_file_path);
+ NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
NNFW_STATUS prepare();
NNFW_STATUS run();
@@ -132,6 +133,7 @@ public:
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+ NNFW_STATUS load_model_from_modelfile(const char *file_path);
//
// Experimental API
@@ -154,8 +156,10 @@ private:
State _state{State::INITIALIZED};
std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
std::unique_ptr<onert::compiler::Compiler> _compiler;
- std::shared_ptr<onert::exec::Execution> _execution;
+ std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+
+ std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
};
#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 42d622aa8..dc038c975 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -4,3 +4,5 @@ add_subdirectory(cpu)
add_subdirectory(acl_cl)
add_subdirectory(acl_neon)
add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(xnnpack)
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 5c5041378..4f48314c1 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -20,6 +20,7 @@
#include <memory>
#include <backend/Backend.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
new file mode 100644
index 000000000..a6f228a4f
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
new file mode 100644
index 000000000..662d767d0
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index b45b91058..413a7ccc3 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node)
const auto &axis_obj = _operands.at(axis_index);
const auto ifm_rank = input_obj.shape().rank();
- const auto frontend_layout = this->_current_op_seq_layout;
+ const auto frontend_layout = this->_current_layout;
auto output_tensor = this->_tensor_reg->getITensor(output_index);
const auto backend_layout = output_tensor->layout();
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 9f3acb461..fc0eca84f 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
@@ -45,4 +45,4 @@ public:
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index e7690af2e..3a5ea5a0f 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
std::unique_ptr<arm_compute::IFunction> fn;
@@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
@@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto &perms = _ctx.at(perm_idx);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
const size_t output_rank = _ctx.at(output_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
@@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
@@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
@@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
UNUSED_RELEASE(backend_layout);
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
@@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
auto ifm_shape = _ctx.at(ifm_index).shape();
auto ofm_shape = _ctx.at(ofm_index).shape();
@@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
- ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
@@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
{
int32_t split_dim = split_dim_op.asScalar<int32_t>();
uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
@@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
@@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto input = _tensor_reg->getAclTensor(input_index)->handle();
auto output = _tensor_reg->getAclTensor(output_index)->handle();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
::arm_compute::PaddingList padding_list;
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index e8a922677..22a7c18a3 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
namespace acl_cl
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,60 +39,61 @@ public:
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+ void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::ResizeNearestNeighbor &) override;
+ void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::TopKV2 &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::TopKV2 &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::ConvertFp32ToFp16 &) override;
- void visit(const ir::operation::ConvertFp16ToFp32 &) override;
- void visit(const ir::operation::Reverse &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
index 18d38ec1b..ad5154860 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.h
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -17,8 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
@@ -28,12 +27,12 @@ namespace backend
namespace acl_cl
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
index 88378b13a..82cbde02f 100644
--- a/runtime/onert/backend/acl_cl/acl_cl.cc
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -14,20 +14,11 @@
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
- return new onert::backend::acl_cl::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
index 21f41a3e6..921d107d9 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+ : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index 52f4c54cf..894e2e7d1 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
+#include <backend/cpu_common/ConstantInitializerBase.h>
#include <ir/Operands.h>
#include "AclTensorRegistry.h"
@@ -28,7 +28,7 @@ namespace backend
namespace acl_common
{
-class AclConstantInitializer : public IConstantInitializer
+class AclConstantInitializer : public cpu_common::ConstantInitializerBase
{
public:
AclConstantInitializer(const ir::Operands &operands,
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index bb7abc95d..12e9ab894 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -21,7 +21,6 @@
#include <queue>
#include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
@@ -43,14 +42,12 @@ enum class UsesType
LAST
};
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
{
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -59,16 +56,16 @@ public:
* @param[in] layout Tensor data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override;
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare();
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
@@ -105,7 +102,6 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
- std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -133,10 +129,9 @@ namespace acl_common
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
- const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+ T_AclTensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67d9d7176..7d3a69032 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -109,13 +109,19 @@ namespace acl_common
case ir::DataType::UINT8:
return ::arm_compute::DataType::U8;
case ir::DataType::QUANT_INT8_SYMM:
- return ::arm_compute::DataType::S8;
+ return ::arm_compute::DataType::QSYMM8;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8_SIGNED;
case ir::DataType::FLOAT16:
return ::arm_compute::DataType::F16;
case ir::DataType::INT64:
return ::arm_compute::DataType::S64;
+ case ir::DataType::QUANT_INT16_ASYMM:
+ return ::arm_compute::DataType::QASYMM16;
+ case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+ return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
default:
- throw std::runtime_error("Not supported, yet");
+ throw std::runtime_error("Not supported internal data type, yet");
break;
}
}
@@ -175,7 +181,7 @@ namespace acl_common
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal activation, yet"};
break;
}
}
@@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal elementwise activation, yet"};
break;
}
}
@@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
return ir::DataType::UINT32;
case ::arm_compute::DataType::QASYMM8:
return ir::DataType::QUANT_UINT8_ASYMM;
+ case ::arm_compute::DataType::QASYMM8_SIGNED:
+ return ir::DataType::QUANT_INT8_ASYMM;
case ::arm_compute::DataType::U8:
return ir::DataType::UINT8;
case ::arm_compute::DataType::QSYMM8:
@@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
case ::arm_compute::DataType::S64:
return ir::DataType::INT64;
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported acl data type, yet"};
break;
}
}
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 35d6e4e8e..b11c19733 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -21,6 +21,7 @@
#include <backend/Backend.h>
#include <ir/Operands.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
new file mode 100644
index 000000000..8b53171f7
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
new file mode 100644
index 000000000..dd764c091
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index c7d71cdcf..9723ba012 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
@@ -41,4 +41,4 @@ public:
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index ffaee3b3e..e712dfa81 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
}
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert(axis_value >= 0 && axis_value < ifm_rank);
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
- ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
@@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
@@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
@@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
{
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
@@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
@@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
@@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
@@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
@@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
@@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto rank = _ctx.at(ifm_idx).shape().rank();
@@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
@@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
const size_t output_rank = _ctx.at(out_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index 4d269cde5..2a4b307b8 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
namespace acl_neon
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,17 +39,20 @@ public:
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::ArgMax &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
@@ -57,36 +60,34 @@ public:
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
index 5fe0d519c..b8fb343e9 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.h
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -17,8 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
@@ -28,12 +27,12 @@ namespace backend
namespace acl_neon
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
index f490d132d..6535fb291 100644
--- a/runtime/onert/backend/acl_neon/acl_neon.cc
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -14,20 +14,11 @@
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
- return new onert::backend::acl_neon::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..0b416a7e9 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -54,8 +54,6 @@ public:
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..6b958c1b7
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..0a4106d33 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,9 @@
#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
@@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(new ExternalContext)
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index c016c83bc..d7858c0f6 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -14,13 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
@@ -29,35 +26,10 @@ namespace backend
namespace cpu
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
- void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
- // TODO: For now the only cpu backend supports constant tensor to use data from external
- // If the other backend supports (to do this,
- // ExternalTensor should be abstract such as IExternal, maybe),
- // this can be an interface of IConstantInitializer
- void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace cpu
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 32e249f5a..f5d11f4f1 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,7 +17,6 @@
#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
@@ -33,7 +32,7 @@ namespace backend
namespace cpu
{
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(new ruy::Context)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 451815b65..25756eced 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -23,6 +23,7 @@
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
#include "ops/EinsumLayer.h"
#include "ops/ElementwiseActivationLayer.h"
@@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
{
switch (type_ir)
{
+ case ir::operation::ElementwiseActivation::Type::ELU:
+ return ops::ElementwiseActivationType::kElu;
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
return ops::ElementwiseActivationType::kLogistic;
case ir::operation::ElementwiseActivation::Type::RELU:
return ops::ElementwiseActivationType::kReLU;
case ir::operation::ElementwiseActivation::Type::TANH:
return ops::ElementwiseActivationType::kTanh;
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ops::ElementwiseActivationType::kLeakyReLU;
default:
throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
}
@@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
{
switch (type_ir)
{
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ return ops::ElementwiseBinaryType::kLogicalAnd;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
return ops::ElementwiseBinaryType::kLogicalOr;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
return ops::ElementwiseUnaryType::kRSqrt;
case ir::operation::ElementwiseUnary::Type::SIN:
return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ return ops::ElementwiseUnaryType::kSqrt;
+ case ir::operation::ElementwiseUnary::Type::SQUARE:
+ return ops::ElementwiseUnaryType::kSquare;
case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
return ops::ElementwiseUnaryType::kZerosLike;
default:
@@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator(
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
_tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
@@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
_return_fn = std::move(fn);
return;
}
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
@@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
- dilation_height, activation, ofm_tensor);
+ dilation_height, activation, ofm_tensor, _external_context);
_return_fn = std::move(fn);
}
@@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
@@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
void KernelGenerator::visit(const ir::operation::Fill &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+ // SHAPE input is used for shape inference
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
- auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto value_tensor = _tensor_reg->getPortableTensor(value_index);
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_tensor, value_tensor, output_tensor);
+ fn->configure(value_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
- assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+ assert(input_shape.rank() < 4 || _current_layout == backend_layout);
const auto axis_raw = node.param().axis;
const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
- // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ // TODO make sure using `_current_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
@@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ // AXIS input is used for output shape inference
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(-rank <= axis && axis < rank);
@@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
const auto input_index{node.getInputs().at(0)};
const auto rank = _ctx.at(input_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(rank == 0 || (-rank <= axis && axis < rank));
@@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+ const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
@@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
_return_fn = std::move(fn);
}
@@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
const auto padding =
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
@@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+ auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 5df77607f..3a4cfbffa 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -23,7 +23,7 @@
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
@@ -34,7 +34,7 @@ namespace backend
namespace cpu
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -43,59 +43,59 @@ public:
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- using IKernelGenerator::visit;
+ void visit(const ir::OpSequence &) override;
void visit(const ir::operation::AddN &) override;
- void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::ArgMinMax &) override;
+ void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::BroadcastTo &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::Custom &node) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::Fill &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Custom &node) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::Fill &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::FusedBatchNorm &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Select &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Shape &) override;
- void visit(const ir::operation::ResizeBilinear &node) override;
- void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::Rank &) override;
- void visit(const ir::operation::MatrixBandPart &) override;
- void visit(const ir::operation::BatchMatMul &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::BroadcastTo &) override;
- void visit(const ir::operation::FusedBatchNorm &) override;
- void visit(const ir::operation::LogSoftmax &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
+ void visit(const ir::operation::Reverse &) override;
+ void visit(const ir::operation::Select &) override;
+ void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Tile &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
private:
const ir::Operands &_ctx;
@@ -103,7 +103,7 @@ private:
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 3edac897c..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager)
- : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
-{
- // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (!_as_constants[ind] && !tensor->is_dynamic())
- {
- auto *buffer = _nonconst_mgr->getBuffer(ind);
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
- }
- }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(!_tensors->getITensor(ind));
- if (as_const)
- {
- auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- else
- {
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
- _dynamic_tensor_manager->dynamic_mem_mgr().get());
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (const auto &it : _tensors->native_tensors())
- fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..d07f0c814 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
namespace onert
{
@@ -32,30 +26,7 @@ namespace backend
namespace cpu
{
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
- StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager);
- virtual ~StaticTensorManager() = default;
-
- void allocateNonconsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- ir::OperandIndexMap<bool> _as_constants;
- cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 2ad2ad0fb..d663c3f50 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -28,92 +28,7 @@ namespace cpu
{
using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- * instead of allocating and copying the data. ExternalTensor's data pointer points to
- * an address of memory such as where memory is already allocated, or mmapped area.
- * This is meaning that ExternalTensor can take all of types' ir::Data.
- * To support this, assume below things no padding, always NHWC layout,
- * constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
- ExternalTensor() = delete;
- virtual ~ExternalTensor();
-
-public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
- : Tensor(info, layout, nullptr)
- {
- assert(_layout == ir::Layout::NHWC);
- assert(_info.isConstant());
- assert(_info.isDynamic() == false);
- }
-
-public:
- /**
- * @brief set Data to be shared from external so that this ExternalTensor will not be
- * allocated on CPU backend
- * @param[in] data data of Operand to be set
- */
- void setData(const std::shared_ptr<ir::Data> data)
- {
- assert(data != nullptr);
- _data = data;
- // Note. Some op such as cker::Conv could take buffer as nullptr.
- // That's why _buffer also would be used
- _buffer = const_cast<uint8_t *>(_data->base());
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
-
- bool is_constant() const override { return true; }
- bool is_dynamic() const override { return false; }
- void set_dynamic() override
- {
- throw std::runtime_error("This tensor does not support changing dynamic");
- }
-
- void setShape(const ir::Shape &) override
- {
- throw std::runtime_error("This tensor does not support changing shape");
- }
-
- void increase_ref() override { ++_num_references; }
-
- void decrease_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- --_num_references;
- if (_num_references == 0)
- {
- _data.reset();
- _buffer = nullptr;
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- void reset_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- _data.reset();
- _buffer = nullptr;
- }
-
- int32_t num_references() override { return _num_references; }
-
-private:
- std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 448abc229..9d8a5deb5 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -20,7 +20,6 @@
#include <backend/cpu_common/DynamicTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include "StaticTensorManager.h"
@@ -35,7 +34,7 @@ namespace backend
namespace cpu
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
#include "Backend.h"
-#include <util/logging.h>
-
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'cpu' loaded\n";
- return new onert::backend::cpu::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index 2fd284c91..d5ffdef0b 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -79,6 +79,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
break;
@@ -97,6 +100,9 @@ void ArgMinMaxLayer::run()
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
break;
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..ba9655924 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
void BatchMatMulLayer::run()
{
- if (_lhs->data_type() == OperandType::FLOAT32)
+ if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
{
batchMatMulFloat32();
}
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..edfdfc1a6 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
void ConcatLayer::run()
{
- if (_output->data_type() == OperandType::FLOAT32)
+ switch (_output->data_type())
{
- concatenationGeneral<float>();
+ case OperandType::FLOAT32:
+ concatenationGeneral<float>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ concatenationQuant8();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ concatenationGeneral<int8_t>();
+ break;
+ case OperandType::INT32:
+ concatenationGeneral<int32_t>();
+ break;
+ case OperandType::INT64:
+ concatenationGeneral<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Concat: unsupported data type");
}
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- concatenationQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- concatenationGeneral<int32_t>();
- }
- else if (_output->data_type() == OperandType::INT64)
- {
- concatenationGeneral<int64_t>();
- }
- else
- throw std::runtime_error("Concat: unsupported data type");
}
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 799e9e2d0..c964e38f9 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -203,8 +203,6 @@ void ConvolutionLayer::prepare()
_prepare = true;
}
-#undef ANDROID_NN_CONV_PARAMETERS
-
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..d265d0ac2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+ nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+ _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ depthToSpace<float>();
+ break;
+ case OperandType::INT32:
+ depthToSpace<int32_t>();
+ break;
+ case OperandType::INT64:
+ depthToSpace<int64_t>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ depthToSpace<uint8_t>();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ depthToSpace<int8_t>();
+ break;
+ default:
+ throw std::runtime_error{"DepthToSpace: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+ DepthToSpaceLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void depthToSpace();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index f1dc1103a..85553d14d 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32()
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<float, float>(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::convQuant8()
@@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8()
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::configure(
@@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure(
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
- const ir::Activation activation, IPortableTensor *output)
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_kernel = kernel;
@@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure(
_dilationHeight = dilationHeight;
_activation = activation;
_output = output;
+ _external_context = external_context;
}
void DepthwiseConvolutionLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index fb032ecbf..fe1fcc182 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
+#include "../ExternalContext.h"
#include <exec/IFunction.h>
@@ -47,7 +48,7 @@ public:
const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
const uint32_t multiplier, const uint32_t dilationWidth,
const uint32_t dilationHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
void run() override;
@@ -71,6 +72,8 @@ private:
uint32_t _dilationHeight{1};
ir::Activation _activation{ir::Activation::NONE};
+
+ std::shared_ptr<ExternalContext> _external_context;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..3e1da5ec0 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
#include "OperationUtils.h"
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/ReLU.h>
#include <cker/operation/ReLU6.h>
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
switch (op_type)
{
+ case ElementwiseActivationType::kElu:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+ }
+ break;
case ElementwiseActivationType::kLogistic:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
@@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
}
break;
+ case ElementwiseActivationType::kLeakyReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+ }
+ break;
default:
throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
}
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..948ab3b57 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
enum class ElementwiseActivationType
{
+ kElu,
kLogistic,
kReLU,
- kTanh
+ kTanh,
+ kLeakyReLU
};
class ElementwiseActivationLayer : public ::onert::exec::IFunction
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..1e17a0828 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,7 @@
#include "OperationUtils.h"
+#include <cker/operation/LogicalAnd.h>
#include <cker/operation/LogicalOr.h>
#include <cker/operation/MaxMin.h>
@@ -33,6 +34,25 @@ namespace ops
namespace
{
template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalAndBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalAndElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
IPortableTensor *output)
{
@@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
switch (op_type)
{
+ case ElementwiseBinaryType::kLogicalAnd:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalAndGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
case ElementwiseBinaryType::kLogicalOr:
if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index 066455e72..15d7f3049 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
}
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
{
if (!HaveSameShapes(input, output))
@@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
throw std::runtime_error{"Sin: Unsupported data type"};
}
break;
+ case ElementwiseUnaryType::kSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSquare:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = squareFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Square: Unsupported data type"};
+ }
+ break;
case ElementwiseUnaryType::kZerosLike:
if (input->data_type() == OperandType::FLOAT32)
{
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index c1765b5b7..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -46,6 +46,8 @@ enum class ElementwiseUnaryType
kRound,
kRSqrt,
kSin,
+ kSqrt,
+ kSquare,
kZerosLike
};
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
namespace ops
{
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
- _axis = axis;
_output = output;
}
void ExpandDimsLayer::run()
{
- // TODO use _axis to calculate shape of output when _axis is not constant
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
ExpandDimsLayer();
public:
- void configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output);
+ void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
private:
const IPortableTensor *_input;
- const IPortableTensor *_axis;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index df3f8b7cd..5b7c17907 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
namespace ops
{
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
{
- _input = input;
_value = value;
_output = output;
}
@@ -47,28 +45,24 @@ void FillLayer::run()
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<float *>(_value->buffer()),
+ nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<float *>(_output->buffer()));
break;
case OperandType::INT32:
- nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int32_t *>(_value->buffer()),
+ nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int32_t *>(_output->buffer()));
break;
case OperandType::INT64:
- nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int64_t *>(_value->buffer()),
+ nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int64_t *>(_output->buffer()));
break;
case OperandType::UINT32:
- nnfw::cker::Fill<uint32_t *>(
- getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
- reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<uint32_t *>(_output->buffer()));
break;
default:
throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
public:
FillLayer();
- void configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output);
+ void configure(const IPortableTensor *value, IPortableTensor *output);
void run() override;
private:
- const IPortableTensor *_input;
const IPortableTensor *_value;
IPortableTensor *_output;
};
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..f130692ee 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
void MeanLayer::MeanFloat32()
{
- nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- getReducerAxes(_axes));
+ const auto inputShape = getTensorShape(_input);
+ const auto axisVec = getReducerAxes(_axes);
+ bool axis_is_1_and_2 =
+ _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+ ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+ if (axis_is_1_and_2)
+ {
+ nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ axisVec);
+ }
}
void MeanLayer::MeanQuant8()
@@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
_axes = axes;
_output = output;
_keep_dims = keep_dims;
+
+ if (_input->data_type() != OperandType::FLOAT32 &&
+ _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+ throw std::runtime_error{"Mean: unsupported data type"};
}
void MeanLayer::run()
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
new file mode 100644
index 000000000..bc8a024d8
--- /dev/null
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
new file mode 100644
index 000000000..ef686f480
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h
new file mode 100644
index 000000000..b965c9a9d
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt
new file mode 100644
index 000000000..206acbfbf
--- /dev/null
+++ b/runtime/onert/backend/ruy/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc
index dac8f898b..179caa9a6 100644
--- a/runtime/onert/backend/cpu/Tensor.cc
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -14,18 +14,18 @@
* limitations under the License.
*/
-#include "Tensor.h"
+#include "Config.h"
namespace onert
{
namespace backend
{
-namespace cpu
+namespace ruy
{
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h
new file mode 100644
index 000000000..9160dd5b1
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "ruy"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h
new file mode 100644
index 000000000..24b4d924d
--- /dev/null
+++ b/runtime/onert/backend/ruy/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
new file mode 100644
index 000000000..f51faccb8
--- /dev/null
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 4;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(new ::ruy::Context)
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
new file mode 100644
index 000000000..cd2825068
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+ if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+ {
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+ param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor, _external_context);
+
+ _return_fn = std::move(fn);
+ return;
+ }
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+ const auto weights_format = node.param().weights_format;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
new file mode 100644
index 000000000..0f6bd590a
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h
new file mode 100644
index 000000000..af2d25241
--- /dev/null
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h
new file mode 100644
index 000000000..60d0fbf77
--- /dev/null
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc
new file mode 100644
index 000000000..c77defc30
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // CPU backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h
new file mode 100644
index 000000000..91c07bd82
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..d249b2ce3
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+ // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::ruy::ConvParams op_params;
+ op_params.padding_type = getPaddingType(_paddingType);
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, const ir::PaddingType paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _paddingType = paddingType;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+ prepare();
+
+ if (_input->is_dynamic() || _kernel->is_dynamic())
+ {
+ const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+ const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto ker_shape = _kernel->getShape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ ir::Stride stride;
+ stride.vertical = _strideWidth;
+ stride.horizontal = _strideWidth;
+
+ ir::Padding param_padding;
+ param_padding.type = _paddingType;
+ param_padding.param.left = _paddingLeft;
+ param_padding.param.right = _paddingRight;
+ param_padding.param.top = _paddingTop;
+ param_padding.param.bottom = _paddingBottom;
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
+
+ _paddingLeft = padding.left;
+ _paddingRight = padding.right;
+ _paddingTop = padding.top;
+ _paddingBottom = padding.bottom;
+ }
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"Conv: unsupported data type"};
+ }
+}
+
+void ConvolutionLayer::prepare()
+{
+ if (_prepare)
+ return;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+ {
+ kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+ _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ }
+ _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..a55387b93
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+ ConvolutionLayer();
+ ~ConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType _paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _paddingType;
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
+
+ ir::Activation _activation;
+
+ std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+ bool _prepare;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..af693e3b4
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+ // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+ nnfw::ruy::FullyConnectedParams op_params;
+
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+ op_params.lhs_cacheable = _weights->is_constant();
+ op_params.rhs_cacheable = _input->is_constant();
+
+ nnfw::ruy::FullyConnected(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format,
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ UNUSED_RELEASE(weights_format);
+ _input = input;
+ _weights = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"FullyConnected: unsupported data type"};
+ }
+}
+
+void FullyConnectedLayer::prepare()
+{
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..33d560f0b
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+ FullyConnectedLayer();
+ ~FullyConnectedLayer();
+
+public:
+ void fullyConnectedFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_weights;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc
new file mode 100644
index 000000000..929107b1a
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+ switch (ir_padding_type)
+ {
+ case ir::PaddingType::EXPLICIT:
+ return nnfw::ruy::PaddingType::kNone;
+ case ir::PaddingType::SAME:
+ return nnfw::ruy::PaddingType::kSame;
+ case ir::PaddingType::VALID:
+ return nnfw::ruy::PaddingType::kValid;
+ default:
+ throw std::runtime_error("Wrong padding type.");
+ break;
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
new file mode 100644
index 000000000..5dfdc7ec5
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+ if (tensor == nullptr)
+ return nnfw::ruy::Shape();
+
+ const ir::Shape &shape = tensor->get_info().shape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::ruy::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+ switch (activation)
+ {
+ case ir::Activation::NONE:
+ return nnfw::ruy::FusedActivationFunctionType::kNone;
+ case ir::Activation::RELU:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu;
+ case ir::Activation::RELU1:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+ case ir::Activation::RELU6:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+ case ir::Activation::TANH:
+ return nnfw::ruy::FusedActivationFunctionType::kTanh;
+ case ir::Activation::SIGMOID:
+ return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+ default:
+ throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+ }
+}
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc
new file mode 100644
index 000000000..4f33590e9
--- /dev/null
+++ b/runtime/onert/backend/ruy/ruy.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
new file mode 100644
index 000000000..b7aef1625
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
new file mode 100644
index 000000000..503d088aa
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h
new file mode 100644
index 000000000..f81175b9e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(nullptr)
+ {
+ int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+ if (num_threads < 1)
+ num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+ _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt
new file mode 100644
index 000000000..e3de31e6f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+ return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/backend/xnnpack/Config.cc
index 4844d21b9..4d42a3f18 100644
--- a/runtime/onert/core/include/backend/IOptimizer.h
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -14,38 +14,31 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__
-#define __ONERT_BACKEND_I_OPTIMIZER_H__
+#include "Config.h"
-namespace onert
-{
-namespace ir
-{
-class LoweredGraph;
-}
-} // namespace onert
+#include <xnnpack.h>
namespace onert
{
namespace backend
{
+namespace xnnpack
+{
-/**
- * @brief Class for backend optimizations. This is an optional class so not all backends must have
- * it.
- *
- */
-struct IOptimizer
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
{
- virtual ~IOptimizer() = default;
- /**
- * @brief Run optimization
- *
- */
- virtual void optimize() = 0;
-};
+ xnn_status status = xnn_initialize(nullptr /* allocator */);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to initialize XNNPACK"};
+ }
+ return true;
+}
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+} // namespace cpu
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_I_OPTIMIZER_H__
diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h
new file mode 100644
index 000000000..2cf7406e5
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+ virtual ~Config();
+
+public:
+ std::string id() override { return "xnnpack"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h
new file mode 100644
index 000000000..45cdd8cd9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.cc
index 88ffb502c..3a9fe1b55 100644
--- a/runtime/onert/core/include/backend/IExternalContext.h
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -14,21 +14,23 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+#include "ExternalContext.h"
+
+#include <cassert>
namespace onert
{
namespace backend
{
+namespace xnnpack
+{
-struct IExternalContext
+ExternalContext::ExternalContext(size_t num_threads)
+ : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
{
- virtual ~IExternalContext() = default;
- virtual void setMaxNumThreads(int) = 0;
-};
+ assert(_threadpool);
+}
+} // namespace xnnpack
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h
new file mode 100644
index 000000000..682fd2e4e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext(size_t num_threads);
+
+public:
+ pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+ std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
new file mode 100644
index 000000000..b7d3f60fb
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto dilation_width = node.param().dilation.width_factor;
+ const auto dilation_height = node.param().dilation.height_factor;
+ const auto param_padding = node.param().padding;
+ const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+ ker_height, dilation_width, dilation_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
new file mode 100644
index 000000000..265824204
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h
new file mode 100644
index 000000000..f7344e8d8
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h
new file mode 100644
index 000000000..b39cbd266
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc
new file mode 100644
index 000000000..b570144ce
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // XNNPACK backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h
new file mode 100644
index 000000000..dddfedbf9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on XNNPACK backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..0612995c2
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 Convolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+ }
+}
+
+bool ConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(3);
+ assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ 1 /* groups */, input_channels /* group_input_channels */,
+ output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, 0, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..6cbaa9f3a
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+ ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..947f04194
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+ const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+ const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+ ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+ const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _multiplier = multiplier;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+ }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(3);
+ uint32_t input_channels = _input->getShape().dim(3);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+ assert(output_channels == input_channels * _multiplier);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ input_channels /* groups */, 1 /* group_input_channels */,
+ _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..10f840ae7
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+ DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation,
+ IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _multiplier;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..d595fda36
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ IPortableTensor *output)
+{
+ _input = input;
+ _kernel = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+ }
+}
+
+bool FullyConnectedLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ const auto &kernel_shape = _kernel->getShape();
+ assert(kernel_shape.rank() == 2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(1);
+
+ const auto &input_shape = _input->getShape();
+ const auto &output_shape = _output->getShape();
+ uint32_t flag = 0;
+ if (input_shape.rank() != output_shape.rank())
+ {
+ flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+ assert(input_shape.num_elements() % input_channels == 0);
+ }
+ else
+ {
+ assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+ }
+
+ assert(_kernel && _kernel->buffer());
+ const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+ const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+ enum xnn_status status = xnn_create_fully_connected_nc_f32(
+ input_channels, output_channels, input_channels /* input stride */,
+ output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+ output_activation_max, flag, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+ enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+ _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..883607ef9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+ FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+ const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
new file mode 100644
index 000000000..68b610f33
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+ Layer(const std::shared_ptr<ExternalContext> external_context)
+ : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+ {
+ // DO NOTHING
+ }
+
+ ~Layer()
+ {
+ if (_kernel_op)
+ xnn_delete_operator(_kernel_op);
+ }
+
+public:
+ void prepare() override
+ {
+ if (_create)
+ return;
+
+ _create = create();
+ assert(_create);
+
+ _setup = setup();
+ }
+ virtual bool create() = 0;
+ virtual bool setup() = 0;
+
+protected:
+ xnn_operator_t _kernel_op;
+ bool _create;
+ bool _setup;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
new file mode 100644
index 000000000..5102e32dd
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+// duplicated from cpu/ops/OperationUtils.h
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <ir/DataType.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported fused activation function"};
+ }
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc
new file mode 100644
index 000000000..38a6c5572
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/xnnpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+ return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+ delete backend;
+}
+}
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index 1eba29550..4d212156a 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -19,6 +19,8 @@
#include <memory>
#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "exec/FunctionSequence.h"
namespace onert
{
@@ -26,12 +28,10 @@ namespace backend
{
class Backend;
-class IConstantInitializer;
-class IKernelGenerator;
-class ITensorRegister;
struct ITensorRegistry;
-struct ITensorBuilder;
-struct IOptimizer;
+
+using FunctionMap =
+ std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>;
class BackendContext
{
@@ -46,15 +46,8 @@ public:
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
+ : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}
{
}
@@ -66,8 +59,19 @@ public:
const Backend *backend() const { return _backend; }
const ir::Graph *graph() const { return _graph; }
- const std::vector<OperationInfo> &operation_list() { return _operation_list; }
- const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; }
+ const std::vector<OperationInfo> &operation_list() const { return _operation_list; }
+ const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; }
+
+ virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &,
+ const ir::OpSequences &, const ir::LowerInfoMap &)
+ {
+ return nullptr;
+ }
+ virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &,
+ const ir::OpSequences &)
+ {
+ return {};
+ }
private:
const Backend *_backend{nullptr};
@@ -77,11 +81,6 @@ private:
public:
std::shared_ptr<ITensorRegistry> tensor_registry;
- std::shared_ptr<ITensorBuilder> tensor_builder;
- std::shared_ptr<IConstantInitializer> constant_initializer;
- std::shared_ptr<IKernelGenerator> kernel_gen;
- std::shared_ptr<ITensorRegister> tensor_register;
- std::shared_ptr<IOptimizer> optimizer;
};
using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>;
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
deleted file mode 100644
index 97721cf19..000000000
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__
-#define __ONERT_BACKEND_ITENSOR_BUILDER_H__
-
-#include <map>
-
-#include "ir/Index.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operation.h"
-#include "ir/Layout.h"
-#include "ITensor.h"
-#include "ITensorManager.h"
-#include "ITensorRegistry.h"
-#include "IDynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct ITensorBuilder
-{
- using IterateFunction = std::function<void(const ir::OperandIndex &)>;
-
- virtual ~ITensorBuilder(void) = default;
-
- /**
- * @brief Register tensor information to allocate on backend
- *
- * @param ind Index
- * @param info Info
- * @param backend_layout Backend layout
- * @param as_const Whether this tensor is constant
- */
- virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) = 0;
-
- /**
- * @brief Check if the tensor has been registered with @c registerTensorInfo
- *
- * @return true If the tensor has been registered
- * @return false Otherwise
- */
- virtual bool isRegistered(const ir::OperandIndex &) const = 0;
-
-public: // methods for static tensor allocation
- /**
- * @brief Let the tensor builder know first use(start of lifetime) of a tensor
- * Must be called before calling @c prepare
- * Must be run up to once for each tensor before calling @c notifyLastUse
- * NOTE: Useful only for static models
- */
- virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
- /**
- * @brief Let the tensor builder know last use(end of lifetime) of a tensor
- * Must be run up to once for each tensor after calling @c notifyFirstUse
- * NOTE: Useful only for static models
- */
- virtual void notifyLastUse(const ir::OperandIndex &) = 0;
- /**
- * @brief Prepare the tensors
- * Before calling this, all the tensors must be registered
- */
- virtual void prepare(void) = 0;
- /**
- * @brief Allocate the tensors
- * Before calling this, @c prepare must be called
- */
- virtual void allocate() = 0;
- /**
- * @brief Some actions after functions' @c IFunction::prepare method.
- * This is called right after each function's @c IFunction::prepare function has been
- * called.
- */
- virtual void postFunctionPrepare() = 0;
-
-public: // methods for dynamic tensor allocation
- /**
- * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception
- * will be thrown.
- *
- * @return pointer of IDynamicTensorManager object
- *
- * @note Since it is a pointer, its life time is from the cration of TensorBuilder
- * to the end of execution
- */
- virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h
deleted file mode 100644
index b8e521ce3..000000000
--- a/runtime/onert/core/include/backend/ITensorRegister.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__
-#define __ONERT_BACKEND_ITENSOR_REGISTER_H__
-
-#include "ir/LowerInfoMap.h"
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensorRegister : public ir::OperationVisitor
-{
-public:
- virtual ~ITensorRegister() = default;
-
-public:
- void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map)
- {
- _current_op_seq_layout = op_seq.getLayout();
- _lower_info_map = lower_info_map;
- assert(_lower_info_map != nullptr);
- assert(tensor_builder().get() != nullptr);
- op_seq.accept(*this);
- }
-
-protected:
- virtual const ir::Operands &operands() const = 0;
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
-
-protected:
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &node) override \
- { \
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \
- { \
- defaultRegisterTensorInfo(ind); \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
- void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
- {
- if (tensor_builder()->isRegistered(index))
- {
- return;
- }
-
- const auto &obj = operands().at(index);
- const auto frontend_layout = frontendLayout();
- const auto backend_layout = backendLayout(index);
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder()->registerTensorInfo(index, backend_info, backend_layout);
- }
-
-protected:
- ir::Layout frontendLayout() const { return _current_op_seq_layout; }
- ir::Layout backendLayout(const ir::OperandIndex &index) const
- {
- assert(_lower_info_map != nullptr);
- const auto lower_info = _lower_info_map->operand.at(index).get();
- return lower_info->def_factors().getOnlyElement().layout();
- }
-
-private:
- ir::Layout _current_op_seq_layout;
- const ir::LowerInfoMap *_lower_info_map{nullptr};
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h
new file mode 100644
index 000000000..19e7b7c99
--- /dev/null
+++ b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/OpSequences.h"
+#include "ir/LowerInfoMap.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext>
+void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ auto graph = ctx.graph();
+ auto tensor_builder = ctx.tensor_builder;
+
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ for (auto ind : ctx.operand_list())
+ {
+ if (model_io.contains(ind))
+ continue;
+ const auto &obj = graph->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != ctx.backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+ auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+
+ // plan for deallocation of dynamic tensor
+ auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
+ auto *tensor = ctx.tensor_registry->getITensor(ind);
+ assert(tensor);
+ dyn_tensor_manager->planDealloc(op_idx, tensor);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h
new file mode 100644
index 000000000..679355599
--- /dev/null
+++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+
+#include "TensorRegistry.h"
+
+#include "ConstantInitializerBase.h"
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+class ConstantInitializer : public ConstantInitializerBase
+{
+public:
+ ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+ // TODO: For now the only cpu backend supports constant tensor to use data from external
+ // If the other backend supports (to do this,
+ // ExternalTensor should be abstract such as IExternal, maybe),
+ // this can be an interface of cpu_common::ConstantInitializerBase
+ void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
+
+private:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h
index 149acecb4..d4c65de38 100644
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h
@@ -14,20 +14,21 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
#include <unordered_map>
#include <functional>
-#include "ITensorBuilder.h"
#include "ir/Coordinates.h"
#include "ir/Layout.h"
#include "ir/Operand.h"
#include "ir/Operands.h"
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
+#include "backend/ITensorRegistry.h"
#include "util/logging.h"
+#include "backend/ITensorRegistry.h"
namespace
{
@@ -153,11 +154,13 @@ namespace onert
{
namespace backend
{
+namespace cpu_common
+{
-class IConstantInitializer : public ir::OperationVisitor
+class ConstantInitializerBase : public ir::OperationVisitor
{
public:
- virtual ~IConstantInitializer() = default;
+ virtual ~ConstantInitializerBase() = default;
public:
void run()
@@ -178,15 +181,15 @@ public:
}
public:
- IConstantInitializer(const ir::Operands &operands)
- : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ ConstantInitializerBase(const ir::Operands &operands)
+ : _operands{operands}, _current_layout{ir::Layout::UNKNOWN}
{
}
public:
using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
- void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
+ void setLayout(ir::Layout layout) { _current_layout = layout; }
protected:
virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
@@ -221,10 +224,11 @@ public:
protected:
const ir::Operands &_operands;
std::unordered_map<ir::OperandIndex, Initializer> _init_map;
- ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout
+ ir::Layout _current_layout;
};
+} // namespace cpu_common
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h
index afc34ec21..49a589768 100644
--- a/runtime/onert/core/include/backend/IKernelGenerator.h
+++ b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h
@@ -14,28 +14,30 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
#include <assert.h>
#include <memory>
#include <functional>
-#include "ITensorBuilder.h"
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
#include <memory>
#include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
namespace onert
{
namespace backend
{
+namespace cpu_common
+{
-class IKernelGenerator : public ir::OperationVisitor
+class KernelGeneratorBase : public ir::OperationVisitor
{
public:
- virtual ~IKernelGenerator() = default;
+ virtual ~KernelGeneratorBase() = default;
std::unique_ptr<exec::IFunction> releaseFunction()
{
@@ -70,7 +72,8 @@ protected:
std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
};
+} // namespace cpu_common
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index fa50b551e..850bcf2f2 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -17,9 +17,11 @@
#ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-#include "MemoryManager.h"
-
#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
@@ -37,12 +39,10 @@ class StaticTensorManager : public backend::IStaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
- DynamicMemoryManager *dynamic_mem_mgr);
+ DynamicTensorManager *dynamic_tensor_manager);
virtual ~StaticTensorManager() = default;
- void allocateConsts(void);
void allocateNonconsts(void);
- void deallocateConsts(void);
void deallocateNonconsts(void);
void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
@@ -54,11 +54,10 @@ public:
void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
private:
- std::unique_ptr<DynamicMemoryManager> _const_mgr;
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
- DynamicMemoryManager *_dynamic_mem_mgr;
+ DynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h
index 5fa20e15d..5fbf4e729 100644
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h
@@ -21,6 +21,7 @@
#include <backend/IPortableTensor.h>
#include <ir/OperandInfo.h>
+#include <ir/Data.h>
namespace onert
{
@@ -177,6 +178,91 @@ private:
std::shared_ptr<Allocator> _allocator;
};
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ * instead of allocating and copying the data. ExternalTensor's data pointer points to
+ * an address of memory such as where memory is already allocated, or mmapped area.
+ * This is meaning that ExternalTensor can take all of types' ir::Data.
+ * To support this, assume below things no padding, always NHWC layout,
+ * constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+ ExternalTensor() = delete;
+ virtual ~ExternalTensor();
+
+public:
+ ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+ : Tensor(info, layout, nullptr)
+ {
+ assert(_layout == ir::Layout::NHWC);
+ assert(_info.isConstant());
+ assert(_info.isDynamic() == false);
+ }
+
+public:
+ /**
+ * @brief set Data to be shared from external so that this ExternalTensor will not be
+ * allocated on CPU backend
+ * @param[in] data data of Operand to be set
+ */
+ void setData(const std::shared_ptr<ir::Data> data)
+ {
+ assert(data != nullptr);
+ _data = data;
+ // Note. Some op such as cker::Conv could take buffer as nullptr.
+ // That's why _buffer also would be used
+ _buffer = const_cast<uint8_t *>(_data->base());
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer; }
+
+ bool is_constant() const override { return true; }
+ bool is_dynamic() const override { return false; }
+ void set_dynamic() override
+ {
+ throw std::runtime_error("This tensor does not support changing dynamic");
+ }
+
+ void setShape(const ir::Shape &) override
+ {
+ throw std::runtime_error("This tensor does not support changing shape");
+ }
+
+ void increase_ref() override { ++_num_references; }
+
+ void decrease_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ --_num_references;
+ if (_num_references == 0)
+ {
+ _data.reset();
+ _buffer = nullptr;
+ }
+ }
+
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ void reset_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ _data.reset();
+ _buffer = nullptr;
+ }
+
+ int32_t num_references() override { return _num_references; }
+
+private:
+ std::shared_ptr<const ir::Data> _data;
+};
} // namespace cpu_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index af13d13f7..7850e21eb 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -34,7 +34,7 @@ class BackendManager
public:
using backend_create_t = backend::Backend *(*)();
using backend_destroy_t = void (*)(backend::Backend *);
- using dlhandle_destroy_t = void (*)(void *);
+ using dlhandle_destroy_t = std::function<void(void *)>;
static BackendManager &get();
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 3098be7ba..68b862d58 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -24,6 +24,7 @@
#include "ir/Graph.h"
#include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -48,7 +49,6 @@ struct CompilerOptions
{
// GENERAL OPTIONS
std::vector<std::string> backend_list;
- bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option
// OPTIONS ONLY FOR DEBUGGING/PROFILING
std::string trace_filepath; //< File path to save trace records
@@ -60,6 +60,8 @@ struct CompilerOptions
bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
bool fp16_enable; //< Whether fp16 mode ON/OFF
+
+ util::TracingCtx *tracing_ctx; //< Profiling information
};
CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
@@ -73,8 +75,9 @@ public:
/**
* @brief Construct a new Compiler object
* @param[in] subgs All subgraphs of a model
+ * @param[in] tracing_ctx Profiling information
*/
- Compiler(const std::shared_ptr<ir::Subgraphs> &subgs);
+ Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
public:
/**
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index aadba6857..f115ab9a8 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -67,8 +67,7 @@ private:
const compiler::BackendResolver &backend_resolver);
void manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- bool is_primary);
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info);
void dumpLowerInfo();
bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
ir::Layout layout, const compiler::BackendResolver &backend_resolver);
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index 05f2679fc..33a2f62d9 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -68,7 +68,7 @@ private:
private:
// TODO Define visitors for operations. List them in alphabetic order.
- void visit(const ir::operation::ArgMax &op) override;
+ void visit(const ir::operation::ArgMinMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
void visit(const ir::operation::BCQFullyConnected &op) override;
void visit(const ir::operation::BCQGather &op) override;
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h
index d2eb83159..1f3a13b06 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInferer.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -49,7 +49,7 @@ public:
public:
// TODO Define visitors for operations. List them in alphabetic order.
// Remove TODO when any op starting from the alphabet is added
- void visit(const ir::operation::ArgMax &op) override;
+ void visit(const ir::operation::ArgMinMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
void visit(const ir::operation::BCQFullyConnected &op) override;
void visit(const ir::operation::BCQGather &op) override;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 1d2831dd0..345bec8eb 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -18,17 +18,32 @@
* @file IExecutor.h
* @brief This file defines interface of Executor
*/
-#ifndef __ONERT_EXEC_I_EXECUTOR_H_
-#define __ONERT_EXEC_I_EXECUTOR_H_
+#ifndef __ONERT_EXEC_I_EXECUTOR_H__
+#define __ONERT_EXEC_I_EXECUTOR_H__
#include "ir/Graph.h"
#include "IFunction.h"
#include "IODescription.h"
+#include "ir/Index.h"
#include "ir/OperationIndexMap.h"
-#include "backend/IDynamicTensorManager.h"
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
namespace onert
{
+namespace backend
+{
+class IPortableTensor;
+namespace controlflow
+{
+class IOTensor;
+}
+}
+}
+namespace onert
+{
namespace exec
{
class IExecutionObserver;
@@ -60,11 +75,29 @@ struct IExecutor
virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
/**
- * @brief Start execution
+ * @brief Execute with user-given input/output description (for primary subgraph)
* @param[in] desc Input and output description
* @note This method should be thread-safe
*/
virtual void execute(const IODescription &desc) = 0;
+
+ /**
+ * @brief Execute with given input/output tensors
+ *
+ * For non-primary subgraphs, input and output tensors must be given.
+ *
+ * @param[in] inputs tensors that are passed as inputs
+ * @param[in] outputs tensors that are passed as outputs
+ */
+ virtual void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) = 0;
+
+ /**
+ * @brief Get output tensor objects
+ *
+ * @return Vector of @c IOTensor
+ */
+ virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0;
};
using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
@@ -72,4 +105,4 @@ using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecu
} // namespace exec
} // namespace onert
-#endif // __ONERT_EXEC_I_EXECUTOR_H_
+#endif // __ONERT_EXEC_I_EXECUTOR_H__
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h
index 9f09de3fb..e77c308ea 100644
--- a/runtime/onert/core/include/ir/DataType.h
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -37,6 +37,7 @@ enum class DataType
INT64 = 8,
QUANT_INT8_ASYMM = 9,
QUANT_INT16_ASYMM = 10,
+ QUANT_INT8_SYMM_PER_CHANNEL = 11,
};
size_t sizeOfDataType(DataType data_type);
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 1f20ee665..45fadc474 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,69 +17,69 @@
// This file has no ifdef guard intentionally
#include "ir/operation/AddN.h"
+#include "ir/operation/ArgMinMax.h"
+#include "ir/operation/BatchMatMul.h"
#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BCQFullyConnected.h"
+#include "ir/operation/BCQGather.h"
#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Pool2D.h"
+#include "ir/operation/Comparison.h"
#include "ir/operation/Concat.h"
-#include "ir/operation/Reshape.h"
-#include "ir/operation/Fill.h"
-#include "ir/operation/FullyConnected.h"
-#include "ir/operation/Softmax.h"
-#include "ir/operation/Transpose.h"
-#include "ir/operation/Permute.h"
-#include "ir/operation/Reduce.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/ConvertFp16ToFp32.h"
+#include "ir/operation/ConvertFp32ToFp16.h"
+#include "ir/operation/Custom.h"
+#include "ir/operation/DepthToSpace.h"
#include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Slice.h"
-#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Squeeze.h"
+#include "ir/operation/Einsum.h"
#include "ir/operation/ElementwiseActivation.h"
#include "ir/operation/ElementwiseBinary.h"
#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/EmbeddingLookup.h"
#include "ir/operation/ExpandDims.h"
-#include "ir/operation/Comparison.h"
+#include "ir/operation/Fill.h"
+#include "ir/operation/FullyConnected.h"
+#include "ir/operation/FusedBatchNorm.h"
+#include "ir/operation/Gather.h"
+#include "ir/operation/HashtableLookup.h"
+#include "ir/operation/If.h"
+#include "ir/operation/InstanceNorm.h"
+#include "ir/operation/L2Normalization.h"
+#include "ir/operation/LocalResponseNormalization.h"
+#include "ir/operation/LogSoftmax.h"
#include "ir/operation/LSTM.h"
+#include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/OneHot.h"
+#include "ir/operation/Pack.h"
+#include "ir/operation/Pad.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/Pool2D.h"
+#include "ir/operation/Pow.h"
+#include "ir/operation/PReLU.h"
+#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
+#include "ir/operation/Reduce.h"
+#include "ir/operation/Reshape.h"
#include "ir/operation/ResizeBilinear.h"
#include "ir/operation/ResizeNearestNeighbor.h"
#include "ir/operation/Reverse.h"
#include "ir/operation/RNN.h"
+#include "ir/operation/Select.h"
+#include "ir/operation/Shape.h"
+#include "ir/operation/Slice.h"
+#include "ir/operation/Softmax.h"
#include "ir/operation/SpaceToBatchND.h"
#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/EmbeddingLookup.h"
-#include "ir/operation/L2Normalization.h"
-#include "ir/operation/HashtableLookup.h"
-#include "ir/operation/InstanceNorm.h"
-#include "ir/operation/PReLU.h"
-#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SquaredDifference.h"
-#include "ir/operation/TopKV2.h"
-#include "ir/operation/Gather.h"
-#include "ir/operation/ArgMax.h"
-#include "ir/operation/LocalResponseNormalization.h"
-#include "ir/operation/DepthToSpace.h"
-#include "ir/operation/Pack.h"
-#include "ir/operation/Select.h"
#include "ir/operation/Split.h"
#include "ir/operation/SplitV.h"
+#include "ir/operation/SquaredDifference.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/StatelessRandomUniform.h"
+#include "ir/operation/StridedSlice.h"
+#include "ir/operation/Tile.h"
+#include "ir/operation/TopKV2.h"
+#include "ir/operation/Transpose.h"
+#include "ir/operation/TransposeConv.h"
#include "ir/operation/Unpack.h"
-#include "ir/operation/Pad.h"
-#include "ir/operation/Custom.h"
-#include "ir/operation/Einsum.h"
-#include "ir/operation/OneHot.h"
-#include "ir/operation/Shape.h"
-#include "ir/operation/ConvertFp32ToFp16.h"
-#include "ir/operation/ConvertFp16ToFp32.h"
-#include "ir/operation/If.h"
#include "ir/operation/While.h"
-#include "ir/operation/Pow.h"
-#include "ir/operation/Tile.h"
-#include "ir/operation/Range.h"
-#include "ir/operation/Rank.h"
-#include "ir/operation/BCQFullyConnected.h"
-#include "ir/operation/BCQGather.h"
-#include "ir/operation/MatrixBandPart.h"
-#include "ir/operation/BatchMatMul.h"
-#include "ir/operation/FusedBatchNorm.h"
-#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index ccde4d179..7f3c40b4b 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -20,69 +20,69 @@
// Internal Name
OP(AddN)
+OP(ArgMinMax)
+OP(BatchMatMul)
OP(BatchToSpaceND)
+OP(BCQFullyConnected)
+OP(BCQGather)
OP(BinaryArithmetic)
OP(BroadcastTo)
+OP(Comparison)
+OP(Concat)
OP(Conv2D)
+OP(ConvertFp16ToFp32)
+OP(ConvertFp32ToFp16)
+OP(Custom)
+OP(DepthToSpace)
OP(DepthwiseConv2D)
-OP(Pool2D)
-OP(Concat)
-OP(Fill)
-OP(FullyConnected)
-OP(Reduce)
-OP(Reshape)
-OP(Softmax)
-OP(Squeeze)
-OP(Slice)
-OP(StridedSlice)
-OP(Transpose)
+OP(Einsum)
OP(ElementwiseActivation)
OP(ElementwiseBinary)
OP(ElementwiseUnary)
+OP(EmbeddingLookup)
OP(ExpandDims)
-OP(Comparison)
+OP(Fill)
+OP(FullyConnected)
+OP(FusedBatchNorm)
+OP(Gather)
+OP(HashtableLookup)
+OP(If)
+OP(InstanceNorm)
+OP(L2Normalization)
+OP(LocalResponseNormalization)
+OP(LogSoftmax)
OP(LSTM)
+OP(MatrixBandPart)
+OP(OneHot)
+OP(Pack)
+OP(Pad)
+OP(Permute)
+OP(Pool2D)
+OP(Pow)
+OP(PReLU)
+OP(Range)
+OP(Rank)
+OP(Reduce)
+OP(Reshape)
OP(ResizeBilinear)
OP(ResizeNearestNeighbor)
OP(Reverse)
OP(RNN)
+OP(Select)
+OP(Shape)
+OP(Slice)
+OP(Softmax)
OP(SpaceToBatchND)
OP(SpaceToDepth)
-OP(EmbeddingLookup)
-OP(L2Normalization)
-OP(HashtableLookup)
-OP(InstanceNorm)
-OP(PReLU)
-OP(TransposeConv)
-OP(SquaredDifference)
-OP(TopKV2)
-OP(Gather)
-OP(ArgMax)
-OP(Einsum)
-OP(LocalResponseNormalization)
-OP(DepthToSpace)
-OP(Pack)
-OP(Select)
OP(Split)
OP(SplitV)
+OP(SquaredDifference)
+OP(Squeeze)
+OP(StatelessRandomUniform)
+OP(StridedSlice)
+OP(Tile)
+OP(TopKV2)
+OP(Transpose)
+OP(TransposeConv)
OP(Unpack)
-OP(Pad)
-OP(Custom)
-OP(Permute)
-OP(OneHot)
-OP(Shape)
-OP(ConvertFp32ToFp16)
-OP(ConvertFp16ToFp32)
-OP(If)
OP(While)
-OP(Pow)
-OP(Tile)
-OP(Range)
-OP(Rank)
-OP(BCQFullyConnected)
-OP(BCQGather)
-OP(MatrixBandPart)
-OP(BatchMatMul)
-OP(FusedBatchNorm)
-OP(LogSoftmax)
-OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h
index 7b4c33b76..6cb369447 100644
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ b/runtime/onert/core/include/ir/Subgraphs.h
@@ -120,7 +120,7 @@ public:
*
* @return count of Subgraphs
*/
- size_t count() { return _subgraphs.size(); }
+ size_t count() const { return _subgraphs.size(); }
/**
* @brief Return the primary subgraph
diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMinMax.h
index ea7eabb83..1c9fccd22 100644
--- a/runtime/onert/core/include/ir/operation/ArgMax.h
+++ b/runtime/onert/core/include/ir/operation/ArgMinMax.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__
-#define __ONERT_IR_OPERATION_ARG_MAX_H__
+#ifndef __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
+#define __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
#include "ir/Operation.h"
@@ -26,7 +26,7 @@ namespace ir
namespace operation
{
-class ArgMax : public Operation
+class ArgMinMax : public Operation
{
public:
enum Input
@@ -38,15 +38,16 @@ public:
struct Param
{
DataType output_type;
+ bool is_arg_max = true;
};
public:
- ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ArgMax; }
+ OpCode opcode() const final { return OpCode::ArgMinMax; }
public:
const Param &param() const { return _param; }
@@ -59,4 +60,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ARG_MAX_H__
+#endif // __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index c40778a56..7d6cb544a 100644
--- a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
-#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
#include "ir/Operation.h"
@@ -51,7 +51,7 @@ public:
RSQRT,
SIN,
SQRT,
- SQURE,
+ SQUARE,
ZEROS_LIKE
};
@@ -80,4 +80,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Fill.h b/runtime/onert/core/include/ir/operation/Fill.h
index 524e41385..b55c77ae5 100644
--- a/runtime/onert/core/include/ir/operation/Fill.h
+++ b/runtime/onert/core/include/ir/operation/Fill.h
@@ -31,7 +31,7 @@ class Fill : public Operation
public:
enum Input
{
- INPUT = 0,
+ SHAPE = 0,
VALUE,
};
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 30f211011..5944f8344 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
@@ -35,6 +35,7 @@ CONFIG(OP_SEQ_MAX_NODE , int , "0")
CONFIG(TRACE_FILEPATH , std::string , "")
CONFIG(FP16_ENABLE , bool , "0")
CONFIG(RUY_THREADS , int , "-1")
+CONFIG(XNNPACK_THREADS , int , "-1")
CONFIG(USE_MMAPED_DATA , bool , "0")
// Auto-generate all operations
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index b6a8144fd..da8bc8620 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -27,6 +27,7 @@ namespace util
{
void config_source(std::unique_ptr<IConfigSource> &&source);
+void config_source_ext(std::unique_ptr<IConfigSource> &&source);
bool toBool(const std::string &val);
int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index 701b835d2..b11da90ce 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -42,7 +42,7 @@ using Shapes = std::vector<ir::Shape>;
// Define shape calculation for operations. List them in alphabetic order.
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param &param);
@@ -70,7 +70,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis);
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf);
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf);
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
new file mode 100644
index 000000000..a82704cf0
--- /dev/null
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_TRACING_CTX_H__
+#define __ONERT_UTIL_TRACING_CTX_H__
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/Subgraphs.h"
+
+#include <unordered_map>
+#include <mutex>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class to maintain information about profiling per session
+ */
+class TracingCtx
+{
+public:
+ /**
+ * @brief Create and store unique session id managed by this class
+ * Note that this constructor can be called by multiple sessions running in parallely.
+ * Use this constructor only when there is only one subgraph in a model.
+ */
+ TracingCtx(const ir::Graph *primary_subgraph)
+ {
+ decideSessionID();
+ _subgraph_indices.emplace(primary_subgraph, 0);
+ }
+
+ /**
+ * @brief Create and store unique session id managed by this class
+ * Note that this constructor can be called by multiple sessions running in parallely.
+ */
+ TracingCtx(const onert::ir::Subgraphs *subgraphs)
+ {
+ assert(subgraphs);
+
+ decideSessionID();
+
+ auto count = subgraphs->count();
+ for (size_t i = 0; i < count; i++)
+ _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
+ }
+
+ uint32_t getSessionId() const { return _session_id; }
+
+ /**
+ * @brief Set subgraph index of a graph
+ */
+ void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); }
+
+ /**
+ * @brief Get subgraph index of a graph.
+ */
+ ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); }
+
+private:
+ void decideSessionID()
+ {
+ std::unique_lock<std::mutex> lock{_session_id_mutex};
+
+ static uint32_t next_session_id = 0;
+ _session_id = next_session_id++;
+ }
+
+private:
+ std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
+ uint32_t _session_id;
+ static std::mutex _session_id_mutex;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_TRACING_CTX_H__
diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h
index 76cfb8d60..65c375077 100644
--- a/runtime/onert/core/include/util/logging.h
+++ b/runtime/onert/core/include/util/logging.h
@@ -64,4 +64,11 @@ static Context &ctx = Context::get();
if (::onert::util::logging::ctx.enabled()) \
std::cout << "[" << __func__ << "] "
+#define WHEN_LOG_ENABLED(METHOD) \
+ if (::onert::util::logging::ctx.enabled()) \
+ do \
+ { \
+ METHOD; \
+ } while (0)
+
#endif // __ONERT_UTIL_LOGGING_H__
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc
index bafa36d28..404c3b155 100644
--- a/runtime/onert/core/src/backend/BackendContext.cc
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -17,7 +17,6 @@
#include "backend/BackendContext.h"
#include "ir/Operation.h"
-#include "backend/IConstantInitializer.h"
namespace onert
{
@@ -31,25 +30,5 @@ void BackendContext::initialize(const std::vector<OperationInfo> &operation_list
_operand_list = operand_list;
}
-void BackendContext::initConsts()
-{
- for (auto &op : _operation_list)
- {
- constant_initializer->setLayout(op.layout);
- _graph->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : _operand_list)
- {
- const auto &obj = _graph->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
index cc8346e6b..3323cf5cb 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -72,8 +72,6 @@ public:
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.cc b/runtime/onert/core/src/backend/controlflow/BackendContext.cc
new file mode 100644
index 000000000..366377edf
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/BackendContext.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h
index 3647338a0..a768d5d61 100644
--- a/runtime/onert/core/src/backend/controlflow/BackendContext.h
+++ b/runtime/onert/core/src/backend/controlflow/BackendContext.h
@@ -18,6 +18,9 @@
#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
@@ -32,21 +35,36 @@ class BackendContext : public onert::backend::BackendContext
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(std::make_shared<ExternalContext>())
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>())
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
index e21a8f357..ac97ef91c 100644
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,10 +17,7 @@
#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#include "TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
@@ -29,21 +26,7 @@ namespace backend
namespace controlflow
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
- {
- }
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace controlflow
} // namespace backend
diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
index 3db6829a9..cfb983136 100644
--- a/runtime/onert/core/src/backend/controlflow/ExternalContext.h
+++ b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
@@ -17,7 +17,6 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
@@ -38,7 +37,7 @@ namespace controlflow
{
// TODO Unify this with cpu::ExternalContext
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.cc b/runtime/onert/core/src/backend/controlflow/IOTensor.cc
new file mode 100644
index 000000000..47405ac9e
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/IOTensor.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+ setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+ assert(tensor);
+ assert(tensor != this);
+ // TODO Handle when layout was changed
+ assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+ _user_tensor.reset();
+ _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+ _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+ _tensor = _user_tensor.get();
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.h b/runtime/onert/core/src/backend/controlflow/IOTensor.h
new file mode 100644
index 000000000..a7ed84b6d
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/IOTensor.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+ IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+
+public:
+ void setTensor(IPortableTensor *tensor);
+ void setUserTensor(uint8_t *buffer, size_t size);
+ ir::OperandInfo orig_info() const { return _orig_info; }
+ ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+ uint8_t *buffer() const override { return _tensor->buffer(); }
+ size_t total_size() const override { return _tensor->total_size(); }
+ size_t dimension(size_t index) const override { return _tensor->dimension(index); }
+ size_t num_dimensions() const override { return _tensor->num_dimensions(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ return _tensor->calcOffset(coords);
+ }
+ ir::Layout layout() const override { return _tensor->layout(); }
+ ir::DataType data_type() const override { return _tensor->data_type(); }
+ float data_scale() const override { return _tensor->data_scale(); }
+ int32_t data_offset() const override { return _tensor->data_offset(); }
+ bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); }
+ void set_dynamic() override { _is_dynamic = true; }
+ ir::Shape getShape() const override { return _tensor->getShape(); }
+ void setShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ _tensor->setShape(shape);
+ }
+ bool is_constant() const override { return _tensor->is_constant(); }
+ bool applyShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ return _tensor->applyShape(shape);
+ }
+
+private:
+ const ir::OperandInfo _orig_info;
+ const ir::Layout _orig_layout;
+ bool _is_dynamic{false};
+ IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected
+ std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index 8e39ee527..2606f044e 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,7 +31,7 @@ namespace backend
namespace controlflow
{
-KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
@@ -77,18 +77,17 @@ void KernelGenerator::visit(const ir::operation::If &node)
const auto then_subg_index = node.param().then_subg_index;
const auto else_subg_index = node.param().else_subg_index;
- std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::IPortableTensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_tensor = getTensor(input_index);
-
+ auto input_tensor = getPortableTensor(input_index);
input_tensors.emplace_back(input_tensor);
}
- std::vector<backend::ITensor *> output_tensors;
+ std::vector<backend::IPortableTensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
- auto output_tensor = getTensor(output_index);
+ auto output_tensor = getPortableTensor(output_index);
output_tensors.emplace_back(output_tensor);
}
@@ -97,8 +96,8 @@ void KernelGenerator::visit(const ir::operation::If &node)
const auto cond_tensor = input_tensors.front();
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index,
- else_subg_index, _executor_map, _external_context);
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+ _external_context);
_return_fn = std::move(fn);
}
@@ -124,33 +123,40 @@ void KernelGenerator::visit(const ir::operation::While &node)
// This op does not support input as a constant, because controlflow backend does not have
// TensorBuilder
- std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::IPortableTensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_tensor = getTensor(input_index);
-
+ auto input_tensor = getPortableTensor(input_index);
input_tensors.emplace_back(input_tensor);
}
- std::vector<backend::ITensor *> output_tensors;
+ std::vector<backend::IPortableTensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
- auto output_tensor = getTensor(output_index);
+ auto output_tensor = getPortableTensor(output_index);
output_tensors.emplace_back(output_tensor);
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
- input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index,
- _executor_map, _external_context);
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+ _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
}
backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- backend::ITensor *ret = _tensor_registries.getITensor(index);
+ // get Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getITensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+ auto ret = _tensor_reg->getPortableTensor(index);
assert(ret != nullptr);
return ret;
}
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index c2c124339..7b395d186 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -17,13 +17,12 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
-#include <backend/ITensorBuilder.h>
#include <exec/IExecutor.h>
#include "ExternalContext.h"
#include <ir/Graph.h>
#include "TensorBuilder.h"
#include "compiler/TensorRegistries.h"
+#include "backend/cpu_common/KernelGeneratorBase.h"
#include "TensorRegistry.h"
namespace onert
@@ -33,10 +32,10 @@ namespace backend
namespace controlflow
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context);
@@ -50,8 +49,6 @@ public:
_executor_map = executor_map.get();
}
- using IKernelGenerator::visit;
-
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::If &) override;
void visit(const ir::operation::Permute &) override;
@@ -59,10 +56,11 @@ public:
private:
backend::ITensor *getTensor(const ir::OperandIndex &index);
+ backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
- IDynamicTensorManager *_dyn_tensor_manager;
+ DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
exec::ExecutorMap *_executor_map;
diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h
index ba5bafd75..87951a9b3 100644
--- a/runtime/onert/core/src/backend/controlflow/Tensor.h
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -27,6 +27,7 @@ namespace controlflow
{
using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace controlflow
} // namespace backend
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index e4b0388f9..a767f0eca 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -30,8 +30,8 @@ namespace controlflow
TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(
- _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())}
+ _static_tensor_mgr{
+ new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
@@ -90,11 +90,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
-void TensorBuilder::prepare(void)
-{
- _static_tensor_mgr->allocateConsts();
- _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
void TensorBuilder::allocate()
{
@@ -102,7 +98,7 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
{
return _dynamic_tensor_mgr.get();
}
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 695994761..d2e3076fd 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -21,7 +21,6 @@
#include <backend/cpu_common/TensorRegistry.h>
#include <backend/cpu_common/Tensor.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include <unordered_map>
@@ -35,7 +34,7 @@ namespace backend
namespace controlflow
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override;
+ DynamicTensorManager *dynamicTensorManager(void);
/**
* @brief Get tensor with a specific OperandIndex.
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
index 94f71bb9c..901f0aebb 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -20,7 +20,7 @@
#include "backend/cpu_common/TensorRegistry.h"
#include "backend/ITensorRegistry.h"
#include "Tensor.h"
-#include "UserTensor.h"
+#include "IOTensor.h"
#include <assert.h>
namespace onert
@@ -36,9 +36,10 @@ namespace controlflow
* This class contains three types of tensors. Two native tensors(tensors that are managed by this
* backend) and the other is migrant tensor.
*
- * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
- * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg )
+ * - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends
*
* @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
*
@@ -53,7 +54,7 @@ public:
auto base_tensor = _base_reg->getITensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
ITensor *getNativeITensor(const ir::OperandIndex &ind) override
@@ -61,7 +62,7 @@ public:
auto base_tensor = _base_reg->getNativeITensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
@@ -69,7 +70,7 @@ public:
auto base_tensor = _base_reg->getPortableTensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
@@ -77,7 +78,7 @@ public:
auto base_tensor = _base_reg->getNativeTensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
@@ -85,10 +86,10 @@ public:
return _base_reg->getNativeTensor(ind);
}
- UserTensor *getNativeUserTensor(const ir::OperandIndex &ind)
+ IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
{
- auto tensor = _native_user_tensors.find(ind);
- if (tensor != _native_user_tensors.end())
+ auto tensor = _native_io_tensors.find(ind);
+ if (tensor != _native_io_tensors.end())
return tensor->second.get();
return nullptr;
}
@@ -108,22 +109,22 @@ public:
_base_reg->setNativeTensor(ind, std::move(tensor));
}
- void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr<UserTensor> &&tensor)
+ void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
{
assert(tensor);
assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _native_user_tensors[ind] = std::move(tensor);
+ _native_io_tensors[ind] = std::move(tensor);
}
- const ir::OperandIndexMap<std::unique_ptr<UserTensor>> &native_user_tensors()
+ const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
{
- return _native_user_tensors;
+ return _native_io_tensors;
}
std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
private:
std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
- ir::OperandIndexMap<std::unique_ptr<UserTensor>> _native_user_tensors;
+ ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
index de91b850a..1d786c4dd 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
@@ -18,7 +18,6 @@
#include <backend/ITensor.h>
#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
#include "PermuteLayer.h"
namespace onert
@@ -30,16 +29,15 @@ namespace controlflow
namespace kernel
{
-IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index},
- _else_subg_index{else_subg_index}, _executor_map{executor_map},
- _external_context{external_context}
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
+ _executor_map{executor_map}, _external_context{external_context}
{
// At this point, executor_map may not have executors of then subg and else subg
}
@@ -48,79 +46,34 @@ void IfLayer::run()
{
// Check condition
// // If true
- // // // Copy _input_tensors -> then subg's inputs
- // // // Run then subg
- // // // Copy outputs of then subg -> _output_tensors
+ // // // Set _input_tensors -> then-subg's inputs
+ // // // Set outputs of then-subg -> _output_tensors
+ // // // Run then-subg
// // Else
- // // // Copy _input_tensors -> else subg's inputs if false
- // // // Run else subg
- // // // Copy outputs of else subg -> _output_tensors
- auto getResultCond = [](backend::ITensor *tensor) -> bool {
+ // // // Set _input_tensors -> else-subg's inputs
+ // // // Set outputs of else-subg -> _output_tensors
+ // // // Run else-subg
+
+ auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
bool ret = false;
tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
return ret;
};
- exec::ExecutorBase *subg_exec = nullptr;
+ exec::IExecutor *subg_exec = nullptr;
bool cond_result = getResultCond(_cond_tensor);
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_then_subg_index).get());
+ subg_exec = _executor_map->at(_then_subg_index).get();
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_else_subg_index).get());
- }
-
- const auto &subg_graph = subg_exec->graph();
-
- std::vector<backend::ITensor *> src_tensors;
- std::vector<backend::ITensor *> dst_tensors;
- // Add tensors used in subgraph or contained in outputs of subgraph
- assert(subg_graph.getInputs().size() == _input_tensors.size());
- assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
- for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i)
- {
- const auto &subg_input_index = subg_graph.getInputs().at(i);
- const auto &subg_input = subg_graph.operands().at(subg_input_index);
- if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index))
- {
- src_tensors.emplace_back(_input_tensors.at(i));
- dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
- }
+ subg_exec = _executor_map->at(_else_subg_index).get();
}
- const auto permute_op_input_to_subg_input =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
- // Add tensors used as output of operation or contained in outputs of operation
- src_tensors.clear();
- dst_tensors.clear();
- assert(_output_indices.size() == subg_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- for (uint32_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- src_tensors.emplace_back(subg_exec->getOutputTensors().at(i));
- dst_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_subg_output_to_op_output =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
- // Remove copying of unused tensor
- permute_op_input_to_subg_input->prepare();
- permute_subg_output_to_op_output->prepare();
- // Copy & run
- subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
- permute_subg_output_to_op_output->run();
+ subg_exec->execute(_input_tensors, _output_tensors);
VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
<< std::endl;
}
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
index 9e944bccc..967552fc3 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
#include <exec/IExecutor.h>
#include "../ExternalContext.h"
@@ -33,9 +33,9 @@ namespace kernel
class IfLayer : public ::onert::exec::IFunction
{
public:
- IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+ IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map,
const std::shared_ptr<ExternalContext> &external_context);
@@ -44,11 +44,9 @@ public:
void run() override;
private:
- backend::ITensor *_cond_tensor;
- const std::vector<backend::ITensor *> _input_tensors;
- const std::vector<backend::ITensor *> _output_tensors;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
+ backend::IPortableTensor *_cond_tensor;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
exec::ExecutorMap *_executor_map;
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
index 5d0f1918e..6fb69b65c 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
@@ -17,7 +17,6 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#include "backend/ITensorBuilder.h"
#include "exec/IPermuteFunction.h"
#include "exec/IExecutor.h"
#include "../ExternalContext.h"
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
index a0d478603..a4b5aa5ca 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
@@ -16,6 +16,7 @@
#include "WhileLayer.h"
+#include <algorithm>
#include <backend/ITensor.h>
#include "exec/ExecutorBase.h"
#include <misc/polymorphic_downcast.h>
@@ -30,16 +31,15 @@ namespace controlflow
namespace kernel
{
-WhileLayer::WhileLayer(const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+ cpu_common::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
- _output_tensors{output_tensors}, _executor_map{executor_map},
- _external_context{external_context}
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+ _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
{
// At this point, executor_map may not have executors of cond subg and body subg
}
@@ -56,164 +56,90 @@ void WhileLayer::run()
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_cond_subg_index).get());
- auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_body_subg_index).get());
-
- const auto &cond_graph = cond_exec->graph();
- const auto &body_graph = body_exec->graph();
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> cond_input_tensors;
- std::vector<backend::ITensor *> body_input_tensors;
- std::vector<backend::ITensor *> body_output_tensors;
- std::vector<backend::ITensor *> output_tensors;
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == _input_tensors.size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_op_input_to_cond_input =
- std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, _external_context);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == _input_tensors.size());
- assert(_output_indices.size() == _output_tensors.size());
- input_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_op_input_to_op_output =
- std::make_shared<PermuteLayer>(input_tensors, output_tensors, _external_context);
-
- // Add all tensors with unused tensors in body subgraph because unused input tensors will be
- // copied output tensors in body subgraph
- assert(_input_tensors.size() == body_exec->getInputTensors().size());
- input_tensors = _input_tensors;
- body_input_tensors = body_exec->getInputTensors();
- const auto permute_op_input_to_body_input =
- std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, _external_context);
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- body_output_tensors.clear();
- cond_input_tensors.clear();
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_cond_input =
- std::make_shared<PermuteLayer>(body_output_tensors, cond_input_tensors, _external_context);
-
- // Add only used tensors in body subgraph
- assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
- body_output_tensors.clear();
- body_input_tensors.clear();
- for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
- {
- const auto &body_input_index = body_graph.getInputs().at(i);
- const auto &body_input = body_graph.operands().at(body_input_index);
- if (body_input.getUses().size() > 0 &&
- !body_exec->graph().getOutputs().contains(body_input_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_body_input =
- std::make_shared<PermuteLayer>(body_output_tensors, body_input_tensors, _external_context);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == body_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- body_output_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_body_output_to_op_output =
- std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _external_context);
+ auto cond_exec = _executor_map->at(_cond_subg_index).get();
+ auto body_exec = _executor_map->at(_body_subg_index).get();
- // Remove copying of unused tensor
- permute_op_input_to_cond_input->prepare();
- permute_op_input_to_op_output->prepare();
- permute_op_input_to_body_input->prepare();
- permute_body_output_to_cond_input->prepare();
- permute_body_output_to_body_input->prepare();
- permute_body_output_to_op_output->prepare();
+ // Need a temp tensor to hold the cond subgraph output
+ assert(cond_exec->getOutputTensors().size() == 1);
+ auto cond_output_tensor = [&]() {
+ auto cond_output = cond_exec->getOutputTensors().at(0);
+ auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ return tensor;
+ }();
VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
+ cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
- assert(cond_exec->getOutputTensors().size() == 1);
- auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
auto getResultCond = [](backend::ITensor *tensor) -> bool {
bool ret = false;
tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
return ret;
};
+ std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+ std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+ // Copying body inputs to outputs when the loop body is never executed
+ if (!getResultCond(cond_output_tensor.get()))
+ {
+ PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+ copy_body_inputs_to_op_outputs.run();
+ return;
+ }
+
+ // Need some temp tensors to hold the body subgraph output
+ std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+ std::vector<IPortableTensor *> temp_outputs;
+ for (auto io_tensor : body_exec->getOutputTensors())
+ {
+ auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ temp_outputs.push_back(tensor.get());
+ temp_outputs_o.push_back(std::move(tensor));
+ }
+
+ std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+ PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
const auto body_execute_with_op_inputs = [&]() {
VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(_input_tensors, permute_op_input_to_body_input);
+ body_exec->execute(_input_tensors, temp_outputs);
VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
const auto body_execute_with_body_outputs = [&]() {
VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
+ body_exec->execute(_output_tensors, temp_outputs);
VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
std::function<void()> body_execute = body_execute_with_op_inputs;
const auto cond_execute = [&]() {
VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
+ cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
};
- auto permute_to_outputs_fn = permute_op_input_to_op_output;
// Loop while Cond subgraph's output is true
- while (getResultCond(cond_output_tensor))
+ while (getResultCond(cond_output_tensor.get()))
{
body_execute();
+ copy_body_outputs_to_op_outputs.run();
cond_execute();
body_execute = body_execute_with_body_outputs;
- permute_to_outputs_fn = permute_body_output_to_op_output;
}
- permute_to_outputs_fn->run();
+
+ // Clean-up the temp tensors
+ _dyn_memory_manager->deallocate(cond_output_tensor.get());
+ for (auto tensor : temp_outputs)
+ {
+ _dyn_memory_manager->deallocate(tensor);
+ }
}
} // namespace kernel
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
index 8f82bd973..d3924c843 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
@@ -17,13 +17,15 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
#include <exec/IExecutor.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
#include "../ExternalContext.h"
+#include "backend/cpu_common/MemoryManager.h"
+
namespace onert
{
namespace backend
@@ -36,11 +38,10 @@ namespace kernel
class WhileLayer : public ::onert::exec::IFunction
{
public:
- WhileLayer(const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+ WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map,
+ exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
@@ -49,11 +50,10 @@ public:
private:
const ir::SubgraphIndex _cond_subg_index;
const ir::SubgraphIndex _body_subg_index;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
- const std::vector<backend::ITensor *> _input_tensors;
- const std::vector<backend::ITensor *> _output_tensors;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
exec::ExecutorMap *_executor_map;
+ cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc
new file mode 100644
index 000000000..732b03ce8
--- /dev/null
+++ b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/cpu_common/BackendContextHelpers.h"
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc
index 6f6eb77bc..610ba5ffc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc
@@ -14,19 +14,19 @@
* limitations under the License.
*/
-#include "ConstantInitializer.h"
-#include "Tensor.h"
+#include "backend/cpu_common/ConstantInitializer.h"
+#include "backend/cpu_common/Tensor.h"
namespace onert
{
namespace backend
{
-namespace cpu
+namespace cpu_common
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+ : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
@@ -53,42 +53,6 @@ void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &in
};
}
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerExternalInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- if (!bias_index.undefined())
- {
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
- }
-}
-
-} // namespace cpu
+} // namespace cpu_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc
index 6fb9757e0..15c2dfeb1 100644
--- a/runtime/onert/core/src/backend/IConstantInitializer.cc
+++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "backend/IConstantInitializer.h"
+#include "backend/cpu_common/ConstantInitializerBase.h"
#include <Half.h>
@@ -24,9 +24,11 @@ namespace onert
{
namespace backend
{
+namespace cpu_common
+{
-void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
+void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
{
// For only CONSTANTS
// TODO Add to check if tensor has been allocated
@@ -67,8 +69,8 @@ void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index
}
}
-void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
+void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
{
// For only CONSTANTS
// TODO Add to check if tensor has been allocated
@@ -82,27 +84,27 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in
switch (type)
{
case DataType::FLOAT32:
- _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
break;
case DataType::INT32:
- _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
break;
case DataType::UINT32:
- _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
break;
case DataType::BOOL8:
case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
break;
case DataType::QUANT_INT8_SYMM:
case DataType::QUANT_INT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
break;
case DataType::FLOAT16:
- _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
break;
case DataType::INT64:
- _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
break;
default:
throw std::runtime_error("Not supported, yet");
@@ -110,5 +112,6 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in
}
}
+} // namespace cpu_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index cac43babe..8c5c46a08 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -17,6 +17,7 @@
#include "backend/cpu_common/StaticTensorManager.h"
#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/Tensor.h"
#include <util/logging.h>
namespace onert
@@ -27,31 +28,13 @@ namespace cpu_common
{
StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
- DynamicMemoryManager *dynamic_mem_mgr)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
- _dynamic_mem_mgr{dynamic_mem_mgr}
+ DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
// DO NOTHING
}
-void StaticTensorManager::allocateConsts(void)
-{
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (_as_constants[ind])
- {
- auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size());
- tensor->setBuffer(mem_alloc);
- auto buffer = mem_alloc->base();
- VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer)
- << "size : " << tensor->total_size() << std::endl;
- }
- }
-}
-
void StaticTensorManager::allocateNonconsts(void)
{
_nonconst_mgr->allocate();
@@ -65,14 +48,12 @@ void StaticTensorManager::allocateNonconsts(void)
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);
- VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
+ VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
}
}
}
-void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
@@ -80,8 +61,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr);
- _tensors->setNativeTensor(ind, std::move(tensor));
+ if (as_const)
+ {
+ auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
+ else
+ {
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+ _dynamic_tensor_manager->dynamic_mem_mgr().get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
_as_constants[ind] = as_const;
}
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
index d3dcf9a6d..e412cb775 100644
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
@@ -95,3 +95,20 @@ bool Tensor::applyShape(const ir::Shape &new_shape)
} // namespace cpu_common
} // namespace backend
} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index 0093f50fd..ea45cbeb7 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -69,55 +69,73 @@ void BackendManager::loadBackend(const std::string &backend)
return;
}
- // TODO Remove indentation
+ const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+ void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+
+ if (handle == nullptr)
{
- const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
- void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+ VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n";
+ return;
+ }
- if (handle == nullptr)
+ VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n";
+
+ {
+ // load object creator function
+ auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+ if (backend_create == nullptr)
{
- VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n",
+ dlerror());
+ dlclose(handle);
return;
}
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
-
+ // load object creator function
+ auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+ if (backend_destroy == nullptr)
{
- // load object creator function
- auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
- if (backend_create == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
- dlerror());
- abort();
- }
-
- // load object creator function
- auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
- if (backend_destroy == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
- dlerror());
- abort();
- }
-
- auto backend_object =
- std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
- bool initialized = backend_object->config()->initialize(); // Call initialize here?
- if (!initialized)
- {
- VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend"
- << std::endl;
- dlclose(handle);
- return;
- }
- _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n",
+ dlerror());
+ dlclose(handle);
+ return;
}
- // Save backend handle (avoid warning by handle lost without dlclose())
- auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
- _handle_map.emplace(backend, std::move(u_handle));
+ auto backend_object =
+ std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+ bool initialized = backend_object->config()->initialize(); // Call initialize here?
+ if (!initialized)
+ {
+ VERBOSE(BackendManager) << backend.c_str()
+ << " backend initialization failed. Don't use this backend"
+ << std::endl;
+ dlclose(handle);
+ return;
+ }
+ _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
}
+
+ // Save backend handle (avoid warning by handle lost without dlclose())
+
+ // NOTE This is a workaround for clang-format3.9 (seems like it does not understand
+ // "by-copy capture with an initializer"
+ // clang-format off
+ auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
+ handle, [id = backend, filename = backend_so](void *h) {
+ if (dlclose(h) == 0)
+ {
+ VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
+ }
+ else
+ {
+ VERBOSE(BackendManager)
+ << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
+ }
+ }};
+// clang-format on
+_handle_map.emplace(backend, std::move(u_handle));
}
backend::Backend *BackendManager::get(const std::string &key)
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index c2844bd7c..7eeb14ad3 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -41,6 +41,30 @@
#include "ir/OperationDumper.h"
#include "misc/string_helpers.h"
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+ std::unordered_map<ir::OpCode, std::string>::iterator it;
+ std::string opbackends;
+
+ for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+ {
+ if (!opbackends.empty())
+ opbackends = opbackends + ", ";
+
+ auto opcode = it->first;
+ const std::string opname = ir::toString(opcode);
+ opbackends += opname + "=" + it->second;
+ }
+ return opbackends;
+}
+
+} // namespace
+
namespace onert
{
@@ -51,7 +75,6 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
{
CompilerOptions options;
options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- options.is_primary_subgraph = false;
options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
@@ -108,13 +131,15 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
return options;
}
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs)
+Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
: _subgraphs{subgs}, _state{State::CREATED}
{
// Set default values for CompilerOptions
// All these default values should not be fetched from Env, when we stop supporting Android NN
// API.
_options = fetchCompilerOptionsFromGlobalConfig(*subgs);
+
+ _options.tracing_ctx = tracing_ctx;
}
void Compiler::enableToFp16() { _options.fp16_enable = true; }
@@ -132,12 +157,10 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
{
// Set control flow backend for control flow operators
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] =
- backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
- backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] =
- backend::controlflow::Config::ID;
+ auto &cfid = backend::controlflow::Config::ID;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid;
}
// FIXME This is a workaround for bcq operations, should remove it
@@ -157,7 +180,11 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl;
VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : "
+ << _options.manual_scheduler_options.backend_for_all << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
+ << std::endl;
VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
@@ -202,7 +229,6 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
@@ -230,6 +256,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
_subgraphs.reset();
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
+ dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+ }
+
// Shape inference.
{
const auto primary_subg_idx = ir::SubgraphIndex{0};
@@ -266,12 +300,8 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
auto &lowered_subg = pair.second;
auto indexed_ranks = lowered_subg->indexed_ranks();
- _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0});
-
- onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
- dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
-
- ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value()));
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
auto executor = std::unique_ptr<exec::IExecutor>{
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index bb325ffbc..356feed7c 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,6 +16,7 @@
#include "ExecutorFactory.h"
+#include <deque>
#include <functional>
#include "exec/ExecutionObservers.h"
#include "exec/LinearExecutor.h"
@@ -25,16 +26,13 @@
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
#include "backend/IPortableTensor.h"
-#include "backend/ITensorRegister.h"
#include "backend/controlflow/Config.h"
#include "backend/controlflow/KernelGenerator.h"
#include "backend/controlflow/UserTensor.h"
#include "backend/controlflow/TensorBuilder.h"
+#include "util/TracingCtx.h"
+
#include <memory>
namespace onert
@@ -66,6 +64,36 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
+void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
+ const ir::OperandIndexSequence &indices)
+{
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
+ assert(cf_tensor_reg);
+
+ for (auto ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::controlflow::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
+ );
+
+ // Add tensor to controlflow TensorRegistry.
+ cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+
} // namespace
} // namespace onert
@@ -134,97 +162,6 @@ void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_g
}
}
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
-{
- for (const auto index : order)
- {
- const auto &op_seq = lowered_graph->op_seqs().at(index);
- const auto backend = lowered_graph->getLowerInfo(index)->backend();
- const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
-
- if (tensor_register)
- {
- // Custom registration
- tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
- }
- else
- {
- // Default registration
- for (const auto op_idx : op_seq)
- {
- const auto &op = lowered_graph->graph().operations().at(op_idx);
- for (const auto &index :
- (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED))
- {
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
- {
- const auto &operand_lower_info =
- lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend)
- continue;
-
- const auto &obj = lowered_graph->graph().operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(),
- obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
- }
- }
- }
- }
-}
-
-std::vector<backend::ITensor *>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices)
-{
- std::vector<backend::ITensor *> ret;
-
- // TODO Store controlflow backend in BackendContext
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
- std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
- for (const auto &e : lowered_graph.backend_contexts())
- {
- auto backend = e.first;
- auto &context = e.second;
- if (backend->config()->id() == backend::controlflow::Config::ID)
- {
- cf_tensor_builder =
- std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
- cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
- }
- }
- assert(cf_tensor_builder);
- assert(cf_tensor_reg);
-
- for (auto ind : indices)
- {
- const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_unique<backend::controlflow::UserTensor>(
- operand.info(),
- ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
- );
-
- // Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor));
- auto *itensor = cf_tensor_reg->getITensor(ind);
- ret.push_back(itensor);
- }
- return ret;
-}
-
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
{
TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
@@ -260,110 +197,78 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
initializeBackendContext(lowered_graph.get());
- // linearize
- assert(!lowered_graph->graph().isBuildingPhase());
-
- /*************************************************
- * Backend dependent analysis & optimization phase
- *************************************************/
-
- for (auto &pair : backend_contexts)
- {
- auto &optimizer = pair.second->optimizer;
- if (optimizer)
- optimizer->optimize();
- }
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
- /**********************************************************
- * Backend dependent analysis & optimization phase finished
- **********************************************************/
+ assert(!lowered_graph->graph().isBuildingPhase());
- /***********************
- * Code generation phase
- ***********************/
+ initializeSubgraphIOTensors(
+ *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ // linearize
auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> output_tensors;
- if (options.is_primary_subgraph)
- {
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
- }
-
Linear::dump(*lowered_graph, order);
- Linear::planTensors(*lowered_graph, order);
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
- for (auto &tensor_builder : tensor_builders)
+ for (auto &pair : backend_contexts)
{
- tensor_builder->prepare();
+ pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
}
prepareMigrantTensors(*lowered_graph);
- ExecutionBuilder builder;
-
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ // Give some runtime objects to controlflow KernelGenerator
+ for (auto &pair : backend_contexts)
+ {
+ auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+ if (cf_context != nullptr)
{
+ auto cf_kernel_gen = cf_context->kernel_gen;
cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
- {
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- }
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
}
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- pair.second->initConsts();
+ // NOTE controlflow backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "controlflow")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Generate kernels
+ for (auto &pair : ordered_contexts)
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ for (auto &pair : codes)
+ {
+ auto &op_seq_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ if (options.he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ }
}
- auto exec =
- new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(code_map), order};
+ auto code_map = builder.releaseCodeMap();
+
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+ order, options.tracing_ctx};
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
@@ -378,100 +283,81 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
initializeBackendContext(lowered_graph.get());
- auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> output_tensors;
- if (options.is_primary_subgraph)
- {
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
- }
-
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto &tensor_builder : tensor_builders)
- {
- lowered_graph->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- {
- tensor_builder->notifyFirstUse(ind);
- }
- });
- }
+ assert(!lowered_graph->graph().isBuildingPhase());
+
+ initializeSubgraphIOTensors(
+ *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &tensor_builder : tensor_builders)
+ // linearize
+ // This order is just for giving topological order info to the backens
+ // TODO When we pass a partial graph to a backend, we can remove this
+ auto order = Linear::linearize(*lowered_graph);
+ for (auto &pair : backend_contexts)
{
- tensor_builder->prepare();
+ pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
}
prepareMigrantTensors(*lowered_graph);
- ExecutionBuilder builder;
-
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ // Give some runtime objects to controlflow KernelGenerator
+ for (auto &pair : backend_contexts)
+ {
+ auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+ if (cf_context != nullptr)
{
- assert(cf_kernel_gen != nullptr);
+ auto cf_kernel_gen = cf_context->kernel_gen;
cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
- {
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- }
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
-
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
}
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- pair.second->initConsts();
+ // NOTE controlflow backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "controlflow")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Generate kernels
+ for (auto &pair : ordered_contexts)
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ for (auto &pair : codes)
+ {
+ auto &op_seq_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ if (options.he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ }
}
+ auto code_map = builder.releaseCodeMap();
+
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_regs, std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+ options.tracing_ctx};
}
else
{
- auto dataflow_exec = new exec::DataflowExecutor{
- std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
+ std::move(code_map), options.tracing_ctx};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
@@ -489,8 +375,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index e76b721ea..06dc691db 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -46,9 +46,6 @@ private:
static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static std::vector<backend::ITensor *>
- initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices);
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 30c8f72a5..fdd2a7653 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -19,8 +19,6 @@
#include "Linear.h"
#include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
#include "backend/Backend.h"
#include "util/logging.h"
@@ -62,190 +60,5 @@ void Linear::dump(const compiler::LoweredGraph &lowered_graph,
}
}
-void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
-{
- const auto &graph = lowered_graph.graph();
- ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- const auto lower_info = lowered_graph.getLowerInfo(ind);
- // TODO Remove if onert doesn't support anymore such as
- // GeneratedTests.reshape_quant8_weights_as_inputs
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- !graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- // Unused input of subgraph
- // TODO Register unused input as nullptr in tensor_builder
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- bool is_const = obj.isConstant();
- if (is_const)
- {
- constants.append(ind);
- }
-
- auto factor = lower_info->def_factors().getOnlyElement();
- auto backend = factor.backend();
- auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- const auto info = obj.info();
- const auto backend_layout = factor.layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
- }
-
- tensor_builder_map[ind] = tensor_builder;
- });
-
- const auto io_tensors =
- (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
- // If a tensor is model output, increase the use of the tensor.
- // This aim is same to above one.
- for (const auto &ind : io_tensors)
- {
- uses_map[ind]++;
- }
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor.
- // It makes the tensor not be dealloced. It means these will be deallocated last.
- // And allocate constant operands first
- VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
-
- // Allocate Model's inputs
- VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
- for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED)
- {
- auto tensor_builder = tensor_builder_map[ind];
- if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
- continue;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- VERBOSE(LINEAR) << "TENSORS" << std::endl;
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- const auto &operand = graph.operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(lowered_graph.getLowerInfo(ind)->def_factors().size() == 1 &&
- lowered_graph.getLowerInfo(ind)->use_factors().size() == 1);
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder_map[ind]->notifyLastUse(ind);
-
- // plan for deallocation of dynamic tensor
- auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
- if (dyn_tensor_manager)
- {
- const auto *backend =
- lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend();
- auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry;
- auto *tensor = tensor_registry->getITensor(ind);
- assert(tensor);
- if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated
- dyn_tensor_manager->planDealloc(op_idx, tensor);
- }
- }
- }
- }
- }
-
- // Dispose and validate
- for (const auto &ind : io_tensors)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index 1e24cf92b..56b42ccb0 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -22,7 +22,6 @@
#include "ir/OpSequences.h"
#include "ir/Index.h"
-#include "backend/ITensorBuilder.h"
#include "compiler/LoweredGraph.h"
namespace onert
@@ -44,8 +43,6 @@ public:
static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
static void dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 673d7d3e8..6d5210dc5 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -32,6 +32,7 @@
#include "compiler/BackendResolver.h"
#include "compiler/ManualScheduler.h"
#include "compiler/HEScheduler.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -40,6 +41,13 @@ namespace compiler
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
+ // set tracing_ctx for copied graph
+ if (options.tracing_ctx)
+ {
+ auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
+ options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
+ }
+
bool linear_executor = (options.executor == "Linear");
// Build backend contexts
@@ -112,7 +120,7 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
.run();
// Set LowerInfo for each operand from the operand::LowerInfo holder
- manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
+ manipulateLowerInfo(operands_lower_info);
dumpLowerInfo();
}
@@ -126,7 +134,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
// Optimization passes
pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
- VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl;
+ VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl;
+ for (auto operand : _graph.getInputs())
+ VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
+ for (auto operand : _graph.getOutputs())
+ VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
dumpOpSequences(_op_seqs, _graph.operations());
// Graph verifications
@@ -322,50 +334,22 @@ void LoweredGraph::makeOpSequences(
}
void LoweredGraph::manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- bool is_primary)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info)
{
const auto controlflow_backend = BackendManager::get().getControlflow();
- // TODO Rather than handling primary graph specially,
- // let the permute inserted and remove it later
- if (is_primary)
+ // TODO Rather than using NHWC Get frontend layout of this node from IR
+ auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
+ for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
- // TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- assert(lower_info->def_factors().empty());
- lower_info->addDefPermuteFactor(factor);
- }
- for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- lower_info->addUsePermuteFactor(factor);
- }
+ auto &&lower_info = operands_lower_info.at(index);
+ assert(lower_info->def_factors().empty());
+ lower_info->addDefPermuteFactor(factor);
}
- else
+ for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0))
- {
- // In case of not that Graph's input is not used in any operation and not the graph's
- // output.
- // In other words, it is not unused input in Graph.
- lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
- }
- else
- {
- // In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
- controlflow_backend,
- ir::Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
- }
+ auto &&lower_info = operands_lower_info.at(index);
+ lower_info->addUsePermuteFactor(factor);
}
for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
@@ -446,8 +430,11 @@ void LoweredGraph::dumpLowerInfo()
sstream << (shape.dim(i)) << " ";
}
sstream << "}" << std::endl;
- sstream << " - Def ir::Operations : " << def_ops << std::endl;
- sstream << " - Use ir::Operations : " << use_ops << std::endl;
+ sstream << " - Def Operations : " << def_ops << std::endl;
+ sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Data : "
+ << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A")
+ << std::endl;
sstream << " - Lower Info" << std::endl;
sstream << " - Def Backends : " << def_layouts << std::endl;
sstream << " - Use Backends : " << use_layouts << std::endl;
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index ed49ee56f..1f4a47864 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -100,10 +100,11 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
}
// Dump final assignment
- backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
- VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
- << backend.config()->id() << std::endl;
- });
+ WHEN_LOG_ENABLED(backend_resolver->iterate(
+ [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+ VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
+ << backend.config()->id() << std::endl;
+ }));
return backend_resolver;
}
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
index c18178da9..e0c9f5283 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -37,7 +37,7 @@ namespace compiler
{
ShapeValidator::ShapeValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
{
}
@@ -59,7 +59,7 @@ void ShapeValidator::operator()()
// creating Compiler
assert(_graph.subgraphs() == nullptr);
- _current_op_seq_layout = _graph.layout();
+ _current_layout = _graph.layout();
_graph.operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
@@ -90,7 +90,7 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
@@ -101,6 +101,14 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+ if (node.getInputs().size() != 2)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
+ OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+ }
+
OP_REQUIRES(input_shape.C == output_shape.C);
}
@@ -330,7 +338,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
@@ -355,7 +363,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
const auto block_size = node.param().block_size;
@@ -471,7 +479,7 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node)
OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
// The kernel has only IHWO layout on frontend
@@ -516,7 +524,7 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index f40c098d5..763cf7ce3 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -93,7 +93,7 @@ private:
// TODO Remove _ctx field
const ir::Graph &_graph;
const ir::Operands &_ctx;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index d3b083b78..1f2c6f3b9 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -142,12 +142,12 @@ void StaticShapeInferer::dump()
}
}
-void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
+void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
const auto &input = _operands.at(input_idx);
- const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto &axis = _operands.at(axis_idx);
// get mutable output operand
@@ -166,7 +166,8 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
axis_value = axis_value < 0 ? axis_value + rank : axis_value;
// re-sizing output shape
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank);
+ ir::Shape new_shape =
+ shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
output.info().shape(new_shape);
}
@@ -335,35 +336,47 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
// even when axis is constant, output shape should be recalculated since user might call
// nnfw_set_input_tensorinfo(input, some_new_shape)
- auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base());
- assert(axis_buf);
+ auto axis_type = axis.typeInfo().type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+ assert(axis.data()->base());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
// re-sizing output shape
- ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]);
+ ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
output.info().shape(new_shape);
}
void StaticShapeInferer::visit(const ir::operation::Fill &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
+ const auto &shape = _operands.at(shape_idx);
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (!input.isConstant())
+ if (!shape.isConstant())
{
output.info().setDynamic();
_return_has_dynamic_tensor = true;
return;
}
- assert(input.typeInfo().type() == ir::DataType::INT32);
+ const auto dims_type = shape.typeInfo().type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
- auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base());
- assert(input_buf);
+ auto dims_buf = shape.data()->base();
+ assert(dims_buf);
+
+ const auto &dims_shape = shape.info().shape();
+ auto new_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf);
output.info().shape(new_shape);
}
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
deleted file mode 100644
index 3b0360b4b..000000000
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__
-#define __ONERT_COMPILER_TENSOR_BUILDERS_H__
-
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-class TensorBuilders
-{
-public:
- TensorBuilders() = default;
-
- TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow)
- {
- for (const auto &e : backend_contexts)
- {
- if (e.first->config()->id() == backend::controlflow::Config::ID)
- {
- _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(
- e.second->tensor_builder);
- if (include_controlflow)
- _tensor_builders.insert(e.second->tensor_builder);
- }
- else
- {
- _tensor_builders.insert(e.second->tensor_builder);
- }
- }
- }
-
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const
- {
- return _tensor_builders.cbegin();
- }
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const
- {
- return _tensor_builders.cend();
- }
-
- std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const
- {
- return _cf_tensor_builder;
- }
-
-private:
- std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
- std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index c83a72ada..8467d51c8 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -130,9 +130,11 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
// Generate output operand and permute operation
auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
- // change model output if operand_index is model output index
+ // change model output if operand_index is model output index and the out operand is controlflow
+ // backend
auto &model_outputs = _graph.getOutputs();
- if (model_outputs.contains(operand_index))
+ const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow();
+ if (model_outputs.contains(operand_index) && factor.backend() == cf_backend)
{
model_outputs.replace(operand_index, out_operand_index);
}
@@ -191,8 +193,10 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
const auto &node = _graph.operations().at(node_index);
VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
- VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl;
- VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl;
+ VERBOSE_F() << " - Input (original) Operand : " << operand_index << "("
+ << input_factor.backend()->config()->id() << ")" << std::endl;
+ VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "("
+ << factor.backend()->config()->id() << ")" << std::endl;
// OpSequence
{
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index 53bc3c204..b81a75794 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,10 @@ bool DataflowExecutor::noWaitingJobs()
}
DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs,
- compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs},
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
@@ -143,7 +142,9 @@ void DataflowExecutor::executeImpl()
}
assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
while (!_ready_jobs.empty())
{
@@ -157,7 +158,7 @@ void DataflowExecutor::executeImpl()
const backend::Backend *backend =
_lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
- _subject.notifyJobBegin(this, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
job->fn_seq()->initRunning();
@@ -167,13 +168,13 @@ void DataflowExecutor::executeImpl()
job->run();
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
notify(job_index);
_finished_jobs[job_index] = std::move(job);
}
assert(noWaitingJobs());
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index 69dfda15c..b72c0d030 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -28,6 +28,7 @@
#include <memory>
#include "exec/ExecutorBase.h"
#include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -50,9 +51,8 @@ public:
* @param code_map OpSequence and its code map
*/
DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
index 1666d3f08..2d9d534f1 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -92,12 +92,12 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
+void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
const auto input = _tensor_registry->getITensor(input_idx);
- const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto axis = _tensor_registry->getITensor(axis_idx);
auto output_ind = op.getOutputs().at(0);
@@ -111,7 +111,7 @@ void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
const auto rank = input_shape.rank();
axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank);
+ ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
output->applyShape(new_shape);
assert(output->buffer() != nullptr);
@@ -388,10 +388,16 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
auto axis = _tensor_registry->getITensor(axis_ind);
- auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
- assert(axis_buf);
+ auto axis_type = axis->data_type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
- auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
+ assert(axis->buffer());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+
+ auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
@@ -402,19 +408,24 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
// check if output is not dynamic
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
- auto input = _tensor_registry->getITensor(input_ind);
- ir::Shape input_shape = input->getShape();
+ auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
+ auto shape = _tensor_registry->getITensor(shape_ind);
- if ((!input->is_dynamic()) && (!output->is_dynamic()))
+ if ((!shape->is_dynamic()) && (!output->is_dynamic()))
return;
- assert(input->data_type() == ir::DataType::INT32);
+ const auto dims_type = shape->data_type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
- auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
- assert(input_buf);
+ auto dims_buf = shape->buffer();
+ assert(dims_buf);
- auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
+ const auto &dims_shape = shape->getShape();
+ auto output_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h
index 846d0930b..d2ddbad34 100644
--- a/runtime/onert/core/src/exec/ExecTime.h
+++ b/runtime/onert/core/src/exec/ExecTime.h
@@ -94,7 +94,7 @@ public:
/**
* @brief Update metrics file with new data.
*/
- void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+ void storeOperationsExecTime() const { _json.storeOperationsExecTime(); }
static const int64_t NOT_FOUND = -1;
private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
index ddb1fb6a0..d5003b126 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -26,37 +26,38 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
_observers.emplace_back(std::move(observer));
}
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
{
for (auto &o : _observers)
{
- o->handleBegin(executor);
+ o->handleSubgraphBegin(ind);
}
}
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
{
for (auto &o : _observers)
{
- o->handleEnd(executor);
+ o->handleSubgraphEnd(ind);
}
}
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index,
+ const ir::OpSequence *op_seq,
const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleBegin(executor, op_seq, backend);
+ o->handleJobBegin(executor, index, op_seq, backend);
}
}
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleEnd(executor, op_seq, backend);
+ o->handleJobEnd(executor, index, op_seq, backend);
}
}
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 49d409a3a..62b3f6201 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -20,6 +20,7 @@
#include <list>
#include "exec/ExecutionObservers.h"
+#include "ir/Index.h"
namespace onert
{
@@ -39,11 +40,11 @@ public:
* @param observer Observer to be added
*/
void add(std::unique_ptr<IExecutionObserver> observer);
- void notifyModelBegin(IExecutor *executor);
- void notifyModelEnd(IExecutor *executor);
- void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifySubgraphBegin(ir::SubgraphIndex ind);
+ void notifySubgraphEnd(ir::SubgraphIndex ind);
+ void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
const backend::Backend *backend);
- void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
const backend::Backend *backend);
private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 066b52ee1..18c0c1dd3 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -17,12 +17,62 @@
#include "exec/ExecutionObservers.h"
#include <string>
+#include <sstream>
#include "util/logging.h"
#include "exec/IExecutor.h"
#include "misc/polymorphic_downcast.h"
#include "ir/OpSequence.h"
#include "util/EventWriter.h"
+#include "util/Utils.h"
+
+namespace
+{
+
+void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
+ decltype(EventCollector::Event::userData) &data)
+{
+ if (op_seq->size() == 0)
+ return;
+
+ // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
+ // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
+ // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
+ auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) {
+ std::string shape_str;
+ auto &shape = g.operands().at(operand_idx).info().shape();
+ for (int i = 0; i < shape.rank(); i++)
+ {
+ if (i == 0)
+ shape_str = "shape(" + std::to_string(shape.dim(i));
+ else
+ shape_str += " " + std::to_string(shape.dim(i));
+ }
+ shape_str += ")";
+
+ return shape_str;
+ };
+
+ const auto &first_op_idx = op_seq->operations().at(0);
+ const auto &first_op_node = g.operations().at(first_op_idx);
+
+ auto &inputs = first_op_node.getInputs();
+ auto size = inputs.size();
+ for (size_t i = 0; i < size; i++)
+ {
+ auto operand_idx = inputs.at(i);
+ if (operand_idx.undefined())
+ continue;
+
+ std::string key("input_shape_" + std::to_string(i));
+ std::string value = build_shape_str(operand_idx);
+ data.emplace_back(std::make_pair(key, value));
+ }
+
+ // add other userData as needed
+}
+
+} // namespace
namespace onert
{
@@ -30,8 +80,8 @@ namespace onert
namespace exec
{
-void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
- const onert::backend::Backend *backend)
+void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
+ const ir::OpSequence *, const onert::backend::Backend *backend)
{
_timer = backend->config()->timer();
if (_timer == nullptr)
@@ -39,8 +89,8 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence
_timer->handleBegin();
}
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
{
_timer->handleEnd();
const auto timer_res = _timer->getTime();
@@ -70,51 +120,74 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
}
};
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
- : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph}
+TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx)
+ : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+ _tracing_ctx{tracing_ctx}
{
+ // TODO Remove below after using _tracing_ctx
+ UNUSED_RELEASE(_tracing_ctx);
+
+ _event_writer = EventWriter::get(filepath);
+ _event_writer->startToUse();
}
-ChromeTracingObserver::~ChromeTracingObserver()
+TracingObserver::~TracingObserver()
{
try
{
- EventWriter{_recorder}.writeToFiles(_base_filepath);
+ _event_writer->readyToFlush(std::move(_recorder));
}
catch (const std::exception &e)
{
- std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl;
+ std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl;
}
}
-void ChromeTracingObserver::handleBegin(IExecutor *)
+void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
_collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
}
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
std::string backend_id = backend->config()->id();
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
- opSequenceTag(op_seq, _graph.operations())});
+
+ auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
+ opSequenceTag(op_seq, _graph.operations())};
+ // add shape of inputs
+ setUserData(_graph, op_seq, ev.userData);
+
+ _collector.onEvent(ev);
}
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
std::string backend_id = backend->config()->id();
_collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
opSequenceTag(op_seq, _graph.operations())});
}
-void ChromeTracingObserver::handleEnd(IExecutor *)
+void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
_collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
}
-std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
- const ir::Operations &operations)
+std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
+ const ir::Operations &operations)
{
if (op_seq->size() == 0)
return "Empty OpSequence";
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index f8c2acca5..a9eebfee1 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -18,12 +18,16 @@
#define __ONERT_EXEC_OBSREVERS_H__
#include "exec/IFunction.h"
+#include "ir/Index.h"
#include "ir/OpSequence.h"
#include "ExecTime.h"
#include "util/ITimer.h"
#include "exec/IExecutor.h"
#include "util/EventCollector.h"
#include "util/EventRecorder.h"
+#include "util/EventWriter.h"
+#include "util/TracingCtx.h"
+#include "util/EventWriter.h"
namespace onert
{
@@ -33,13 +37,15 @@ class IExecutionObserver
{
public:
/// @brief Invoked just before model (not individual operation) execution begins
- virtual void handleBegin(IExecutor *) { return; }
+ virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
- virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
- virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+ virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) = 0;
+ virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) = 0;
/// @brief Invoked just after model (not individual operation) execution ends
- virtual void handleEnd(IExecutor *) { return; }
+ virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; }
virtual ~IExecutionObserver() = default;
};
@@ -51,10 +57,12 @@ public:
: _et(std::move(et)), _graph(graph)
{
}
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
- void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+ void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
private:
std::unique_ptr<util::ITimer> _timer;
@@ -62,24 +70,28 @@ private:
const ir::Graph &_graph;
};
-class ChromeTracingObserver : public IExecutionObserver
+class TracingObserver : public IExecutionObserver
{
public:
- ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph);
- ~ChromeTracingObserver();
- void handleBegin(IExecutor *) override;
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *) override;
+ TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx);
+ ~TracingObserver();
+ void handleSubgraphBegin(ir::SubgraphIndex) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleSubgraphEnd(ir::SubgraphIndex) override;
private:
static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
private:
- const std::string &_base_filepath;
- EventRecorder _recorder;
+ std::unique_ptr<EventRecorder> _recorder;
EventCollector _collector;
const ir::Graph &_graph;
+ EventWriter *_event_writer;
+ const util::TracingCtx *_tracing_ctx;
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 018a0bba0..588a3258d 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -15,11 +15,11 @@
*/
#include "ExecutorBase.h"
+#include "ShapeConverter.h"
-#include "backend/ITensor.h"
#include "backend/controlflow/UserTensor.h"
-#include "backend/cpu_common/Tensor.h"
#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
namespace onert
{
@@ -27,43 +27,27 @@ namespace exec
{
ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs)
- : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+ const compiler::TensorRegistries &tensor_regs,
+ const util::TracingCtx *tracing_ctx)
+ : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
+ _tracing_ctx(tracing_ctx)
{
- // TODO Fix the way of knowing whether it is primary or not
- bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
- if (!primary_executor)
- {
- auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<backend::ITensor *> list;
- for (auto ind : ind_seq)
- {
- backend::ITensor *tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- list.push_back(tensor);
- }
- return list;
- };
- auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<backend::ITensor *> list;
- for (auto ind : ind_seq)
- {
- backend::ITensor *tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- list.push_back(tensor);
- }
- return list;
- };
- _input_tensors = build_input_tensor_list(_graph.getInputs());
- _output_tensors = build_output_tensor_list(_graph.getOutputs());
- }
+ auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+ assert(tensors.empty());
+ for (auto ind : ind_seq)
+ {
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
+ assert(tensor != nullptr);
+ auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
+ tensors.push_back(io_tensor);
+ }
+ };
+ build_tensor_list(_graph.getInputs(), _input_tensors);
+ build_tensor_list(_graph.getOutputs(), _output_tensors);
}
-void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn)
+void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs)
{
// For thread-safe, use mutex
// TODO: if all used backends on this executor are thread-safe,
@@ -71,31 +55,37 @@ void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
// Deadlock occurs when an Executor is called recursively.
std::lock_guard<std::mutex> lock(_mutex);
- assert(src_tensors.size() == _graph.getInputs().size());
- assert(src_tensors.size() == _input_tensors.size());
- for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+ assert(inputs.size() == _graph.getInputs().size());
+ assert(inputs.size() == _input_tensors.size());
+ for (uint32_t n = 0; n < inputs.size(); ++n)
{
- // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
- // This code find the info to allocate dynamic tensor, and allocate memory based on the source
- // tensor's shape set by caller.
- const auto src_tensor = src_tensors[n];
+ const auto input = inputs[n];
+ assert(input->buffer() != nullptr);
auto input_tensor = _input_tensors[n];
- // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
- if (src_tensor != nullptr && input_tensor != nullptr)
+ assert(input_tensor != nullptr);
+ if (input != nullptr)
{
- const auto orig_input_shape = input_tensor->getShape();
+ const auto orig_input_shape = input_tensor->orig_info().shape();
const auto changed_input_shape =
- convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
+ convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
if (orig_input_shape != changed_input_shape)
{
input_tensor->set_dynamic();
}
}
+ input_tensor->setTensor(input);
}
- // TODO Move calling permute_fn.run() into executeImpl()
- assert(pre_fn);
- pre_fn->run();
+ assert(outputs.size() == _graph.getOutputs().size());
+ assert(outputs.size() == _output_tensors.size());
+ for (uint32_t n = 0; n < outputs.size(); ++n)
+ {
+ const auto output = outputs[n];
+ // assert(dst_tensor->buffer() != nullptr);
+ auto output_tensor = _output_tensors[n];
+ assert(output_tensor != nullptr);
+ output_tensor->setTensor(output);
+ }
executeImpl();
}
@@ -111,19 +101,19 @@ void ExecutorBase::execute(const IODescription &desc)
assert(_input_tensors.size() == desc.inputs.size());
for (uint32_t i = 0; i < _input_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]);
- assert(tensor);
+ auto tensor = _input_tensors[i];
+
+ // TODO Check if (desc.inputs[i] == nullptr)
+ // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+ tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+ desc.inputs[i]->size);
+
auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
}
- // TODO Check if (desc.inputs[i] == nullptr)
- // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
- tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
- desc.inputs[i]->size);
handleDynamicInputTensor(ir::IOIndex{i}, desc);
}
@@ -131,13 +121,12 @@ void ExecutorBase::execute(const IODescription &desc)
assert(_output_tensors.size() == desc.outputs.size());
for (uint32_t i = 0; i < _output_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]);
- assert(tensor);
- tensor->set_dynamic(); // It can't be resized but shape could change
+ auto tensor = _output_tensors[i];
+
if (desc.outputs[i] == nullptr)
throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
- tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+ tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+ tensor->set_dynamic(); // It can't be resized but shape could change
}
executeImpl();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index 8a6ec9174..5d95c10bf 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,23 +17,25 @@
#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
#define __ONERT_EXEC_EXECUTOR_BASE_H__
-#include <mutex>
-
#include "IPermuteFunction.h"
-#include "exec/ExecutionObservers.h"
-#include "ShapeConverter.h"
#include "exec/IExecutor.h"
-#include "compiler/LoweredGraph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
#include "exec/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
#include "exec/ExecutionObservee.h"
+#include "exec/IFunction.h"
+#include "exec/IODescription.h"
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OperationIndexMap.h"
+#include "compiler/LoweredGraph.h"
#include "compiler/TensorRegistries.h"
-#include <list>
+#include "backend/controlflow/IOTensor.h"
+#include "util/TracingCtx.h"
+
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <vector>
namespace onert
{
@@ -49,25 +51,17 @@ public:
* @param tensor_builders Tensor builders that are currently used
*/
ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs);
+ const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
virtual ~ExecutorBase() = default;
const ir::Graph &graph() final { return _graph; }
- /**
- * @brief Execute without IODescription
- *
- * @param src_tensor Tensor list that will be copied to input tensors of this
- * @param pre_fn The permutation function that copy from src_tensor to input tensors of this
- */
- void execute(const std::vector<backend::ITensor *> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn);
-
void execute(const IODescription &desc) final;
+ void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) override;
+
// Used only in Dataflow and Parallel Executors
void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
{
@@ -78,9 +72,10 @@ public:
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
- const std::vector<backend::ITensor *> &getInputTensors() const { return _input_tensors; }
-
- const std::vector<backend::ITensor *> &getOutputTensors() const { return _output_tensors; }
+ const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override
+ {
+ return _output_tensors;
+ }
protected:
/**
@@ -93,9 +88,10 @@ protected:
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
- std::vector<backend::ITensor *> _input_tensors;
- std::vector<backend::ITensor *> _output_tensors;
+ std::vector<backend::controlflow::IOTensor *> _input_tensors;
+ std::vector<backend::controlflow::IOTensor *> _output_tensors;
std::mutex _mutex;
+ const util::TracingCtx *_tracing_ctx;
private:
void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 11017edc9..8f62156a6 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -120,7 +120,8 @@ protected:
}
assert(src_tensor != dst_tensor);
- assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type()));
+ if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+ throw std::runtime_error("data type does not match");
switch (src_tensor->data_type())
{
case ir::DataType::FLOAT32:
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index 72a18def1..b29216a2f 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
stream.seekp(-2, std::ofstream::end);
}
-void JSON::uploadOperationsExecTime() const
+void JSON::storeOperationsExecTime() const
{
std::ofstream stream(_measurement_file);
if (!stream.is_open())
diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h
index a64cb3133..8987d723c 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.h
+++ b/runtime/onert/core/src/exec/JSONExecTime.h
@@ -54,18 +54,16 @@ public:
loadOperationsExecTime();
};
/**
- * @brief Update _operations_exec_time_file with new data.
+ * @brief Update _measurement_file with new data.
*/
- void uploadOperationsExecTime() const;
+ void storeOperationsExecTime() const;
private:
///@brief file containing measurements
std::string _measurement_file;
std::unordered_map<std::string, const backend::Backend *> _backends;
- std::unordered_map<
- const backend::Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
- &_measurements;
+ MeasurementData &_measurements;
+
/**
* @brief Helper function for inserting data to OperationExecTimes
*
@@ -86,7 +84,7 @@ private:
void printOperation(const std::map<uint32_t, int64_t> &operation_info,
std::ofstream &stream) const;
/**
- * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+ * @brief Parse and load _measurements from _measurement_file.
*/
void loadOperationsExecTime();
};
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
index 6e6ca110f..a6d447312 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -39,7 +39,9 @@ char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operati
void LinearExecutor::executeImpl()
{
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
for (auto &&code : _code)
{
const auto op_seq = code.op_seq;
@@ -48,7 +50,7 @@ void LinearExecutor::executeImpl()
#ifdef RUY_PROFILER
ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
#endif
- _subject.notifyJobBegin(this, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
auto &fn_seq = code.fn_seq;
@@ -58,9 +60,9 @@ void LinearExecutor::executeImpl()
fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
fn_seq->run();
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
}
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
}
} // namespace exec
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 22d00ec30..d43c97012 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -27,6 +27,7 @@
#include "compiler/Linear.h"
#include "exec/FunctionSequence.h"
#include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -47,11 +48,9 @@ public:
* @param code_map OpSequence and its code map
*/
LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
- const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}
+ const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}
{
for (auto index : order)
{
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index 676bdb5fa..e9e576ce8 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,10 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
}
ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs,
- compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(code_map)}
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
@@ -100,7 +98,10 @@ void ParallelExecutor::executeImpl()
VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+
while (true)
{
std::unique_lock<std::mutex> lock{_mu_jobs};
@@ -126,9 +127,11 @@ void ParallelExecutor::executeImpl()
auto op_sequence_index = _job_to_op_seq[job_index];
auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
- auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
+ auto setup = [&, op_seq, backend]() {
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+ };
auto teardown = [&, job_index, op_seq, backend]() {
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
notify(job_index);
};
@@ -146,7 +149,7 @@ void ParallelExecutor::executeImpl()
// Wait for all the jobs done
_scheduler->finish();
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 111c20c0c..fd9db42e1 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -28,6 +28,7 @@
#include <memory>
#include "exec/DataflowExecutor.h"
#include "ParallelScheduler.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -51,9 +52,8 @@ public:
* @param code_map OpSequence and its code map
*/
ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
index 2e3f3ca54..99d7b3af7 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -58,6 +58,15 @@ public:
* @note It should be called after setting input and output buffer
*/
void execute(const exec::IODescription &desc) final;
+ void execute(const std::vector<backend::IPortableTensor *> &,
+ const std::vector<backend::IPortableTensor *> &) final
+ {
+ throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
+ }
+ const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final
+ {
+ throw new std::runtime_error{"Interpreter does not support this function."};
+ }
private:
const ir::Graph &_graph;
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
index 0473855d9..e1fb767fe 100644
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -116,7 +116,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
}
void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
index 9eedcd21a..8e75c4f53 100644
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -42,6 +42,7 @@ size_t sizeOfDataType(DataType data_type)
return sizeof(uint8_t);
case DataType::QUANT_INT8_SYMM:
case DataType::QUANT_INT8_ASYMM:
+ case DataType::QUANT_INT8_SYMM_PER_CHANNEL:
return sizeof(int8_t);
case DataType::FLOAT16:
return sizeof(float16);
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index eecfe81cc..a8578b4ce 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -72,7 +72,14 @@ OperationDumper::OperationDumper(const std::string &start_msg)
VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ArgMinMax &node)
+{
+ std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)";
+ VERBOSE(LIR) << "* " << node.name() << min_max << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis("
+ << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
void OperationDumper::visit(const BatchToSpaceND &node)
{
@@ -159,6 +166,14 @@ void OperationDumper::visit(const ExpandDims &node)
dumpUnaryInputOp(node, axis);
}
+void OperationDumper::visit(const Fill &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value("
+ << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
void OperationDumper::visit(const FullyConnected &node)
{
std::string inputs =
@@ -505,7 +520,7 @@ void OperationDumper::visit(const While &node)
}
VERBOSE(LIR) << " - Inputs : "
<< "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
- << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl;
+ << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl;
std::string outputs;
const auto &output_indices = node.getOutputs();
for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index 91642ab13..fe18307b9 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,7 +31,7 @@ public:
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::ArgMax &) override;
+ void visit(const operation::ArgMinMax &) override;
void visit(const operation::BatchToSpaceND &node) override;
void visit(const operation::BCQFullyConnected &node) override;
void visit(const operation::BinaryArithmetic &node) override;
@@ -48,6 +48,7 @@ public:
void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
void visit(const operation::ExpandDims &) override;
+ void visit(const operation::Fill &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc
index da08e81fc..6f81c2a56 100644
--- a/runtime/onert/core/src/ir/OperationValidator.cc
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -55,6 +55,17 @@ bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex
return operandType(idx1) == operandType(idx2);
}
+bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+ if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
+ return false;
+
+ if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset())
+ return false;
+
+ return true;
+}
+
bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type)
{
return operandType(idx) == type;
@@ -76,29 +87,54 @@ bool OperationValidator::isValidType(const OperandIndex &idx,
void OperationValidator::visit(const operation::AddN &node)
{
+ const auto output_index(node.getOutputs().at(0));
+
int size = node.getInputs().size();
for (int i = 0; i < size; i++)
{
const auto input_index(node.getInputs().at(i));
OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32}));
+ OP_REQUIRES(isSameType(input_index, output_index));
}
}
+void OperationValidator::visit(const operation::ArgMinMax &node)
+{
+ const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT));
+ const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS));
+ const auto output_index(node.getOutputs().at(0));
+ const auto output_type = node.param().output_type;
+
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, output_type));
+}
+
void OperationValidator::visit(const operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS));
const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS));
+ const auto output_index(node.getOutputs().at(0));
// Constant lhs and rhs is not implemented yet
OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index));
+
+ // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float)
+ OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(lhs_index, rhs_index) ||
+ ((operandType(lhs_index) == DataType::FLOAT32) &&
+ (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM)));
+ OP_REQUIRES(isSameType(lhs_index, output_index));
}
void OperationValidator::visit(const operation::BatchToSpaceND &node)
{
- const auto block_size_index{node.getInputs().at(operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
- // Non-constant block_size is not implemented yet
- OP_REQUIRES(isConstant(block_size_index));
+ OP_REQUIRES(isSameType(input_index, output_index));
}
void OperationValidator::visit(const operation::BinaryArithmetic &node)
@@ -122,10 +158,48 @@ void OperationValidator::visit(const operation::Comparison &node)
OP_REQUIRES(isValidType(output_index, DataType::BOOL8));
}
+void OperationValidator::visit(const operation::Concat &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ for (auto input_index : node.getInputs())
+ {
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ // Int8 quantization requires same scale and zero point
+ if (isValidType(output_index, DataType::QUANT_INT8_ASYMM))
+ {
+ OP_REQUIRES(isSameQuantParam(input_index, output_index));
+ }
+ }
+}
+
+void OperationValidator::visit(const operation::Conv2D &node)
+{
+ const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ uint32_t stride_horizontal = node.param().stride.horizontal;
+ uint32_t stride_vertical = node.param().stride.vertical;
+ uint32_t dilation_width = node.param().dilation.width_factor;
+ uint32_t dilation_height = node.param().dilation.height_factor;
+
+ OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+ OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+ OP_REQUIRES(isSameType(input_index, output_index));
+}
+
void OperationValidator::visit(const operation::DepthToSpace &node)
{
+ const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
int32_t block_size = node.param().block_size;
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
OP_REQUIRES(block_size > 0);
}
@@ -151,6 +225,32 @@ void OperationValidator::visit(const operation::ElementwiseActivation &node)
// Check if I/O types match
OP_REQUIRES(isSameType(output_index, input_index));
+
+ switch (node.param().op_type)
+ {
+ case operation::ElementwiseActivation::Type::ELU:
+ OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+ break;
+ case operation::ElementwiseActivation::Type::LEAKY_RELU:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::LOGISTIC:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::RELU:
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::TANH:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ }
}
void OperationValidator::visit(const operation::ElementwiseBinary &node)
@@ -161,6 +261,13 @@ void OperationValidator::visit(const operation::ElementwiseBinary &node)
OP_REQUIRES(isSameType(lhs_index, rhs_index));
OP_REQUIRES(isSameType(lhs_index, output_index));
+
+ const auto op_type = node.param().op_type;
+ if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND ||
+ op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR)
+ {
+ OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8));
+ }
}
void OperationValidator::visit(const operation::ElementwiseUnary &node)
@@ -195,8 +302,17 @@ void OperationValidator::visit(const operation::ElementwiseUnary &node)
void OperationValidator::visit(const operation::EmbeddingLookup &node)
{
const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)};
+ const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+
+ // TFLite: Allow hybrid type - value table & output
+ // NNAPI: Require same value table and output type
+ OP_REQUIRES(
+ isSameType(values_index, output_index) ||
+ (isValidType(output_index, DataType::FLOAT32) &&
+ (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
}
void OperationValidator::visit(const operation::ExpandDims &node)
@@ -206,7 +322,19 @@ void OperationValidator::visit(const operation::ExpandDims &node)
const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)};
OP_REQUIRES(isSameType(output_index, input_index));
- OP_REQUIRES(isValidType(axis_index, DataType::INT32));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Fill &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)};
+ const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)};
+
+ OP_REQUIRES(isSameType(output_index, value_index));
+ OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index,
+ {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8}));
}
void OperationValidator::visit(const operation::HashtableLookup &node)
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h
index 2ea8000e5..5b95b16ba 100644
--- a/runtime/onert/core/src/ir/OperationValidator.h
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -44,10 +44,13 @@ public:
public:
void visit(const operation::AddN &node) override;
+ void visit(const operation::ArgMinMax &node) override;
void visit(const operation::BatchMatMul &node) override;
void visit(const operation::BatchToSpaceND &node) override;
void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::Comparison &node) override;
+ void visit(const operation::Concat &node) override;
+ void visit(const operation::Conv2D &node) override;
void visit(const operation::DepthToSpace &node) override;
void visit(const operation::DepthwiseConv2D &node) override;
void visit(const operation::ElementwiseActivation &node) override;
@@ -55,6 +58,7 @@ public:
void visit(const operation::ElementwiseUnary &node) override;
void visit(const operation::EmbeddingLookup &node) override;
void visit(const operation::ExpandDims &node) override;
+ void visit(const operation::Fill &node) override;
void visit(const operation::HashtableLookup &node) override;
void visit(const operation::Pack &node) override;
void visit(const operation::Pad &node) override;
@@ -76,6 +80,7 @@ private:
DataType operandType(const OperandIndex &idx);
bool isConstant(const OperandIndex &idx);
bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2);
+ bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2);
bool isValidType(const OperandIndex &idx, const DataType &type);
bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types);
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
index f3bd8fd73..989d905bf 100644
--- a/runtime/onert/core/src/ir/operation/ArgMax.cc
+++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
@@ -14,10 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
+#include "ir/operation/ArgMinMax.h"
#include "ir/OperationVisitor.h"
namespace onert
@@ -27,10 +24,10 @@ namespace ir
namespace operation
{
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
+void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
+ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
: Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index 6a0be7eb8..20b6fa124 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -57,7 +57,7 @@ std::string ElementwiseUnary::name() const
{ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
{ElementwiseUnaryType::SIN, std::string{"Sin"}},
{ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
- {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+ {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
{ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
return name_map.at(_param.op_type);
}
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 45cce662e..9da93f68a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -30,8 +30,10 @@ namespace util
{
static std::unique_ptr<IConfigSource> _source;
+static std::unique_ptr<IConfigSource> _source_ext;
void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
static IConfigSource *config_source()
{
@@ -67,6 +69,15 @@ static std::string getConfigOrDefault(const std::string &key)
auto ret = config_source()->get(key);
if (ret.empty())
{
+ // if env is not set, search from external
+ if (_source_ext.get())
+ {
+ ret = _source_ext.get()->get(key);
+ }
+ }
+ // if not found search from defaults
+ if (ret.empty())
+ {
auto itr = defaults.find(key);
if (itr != defaults.end())
{
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index de37276bf..fd5618714 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -38,15 +38,17 @@ class DurationEventBuilder
public:
DurationEventBuilder(const std::string &ts) : _ts{ts} {}
- DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const
+ DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const
{
DurationEvent evt;
- evt.name = name;
- evt.tid = tid;
+ evt.name = evt_collected.label;
+ evt.tid = evt_collected.backend;
evt.ph = ph;
evt.ts = _ts;
+ evt.args = evt_collected.userData;
+
return evt;
}
@@ -93,11 +95,11 @@ void EventCollector::onEvent(const Event &event)
switch (event.edge)
{
case Edge::BEGIN:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B"));
+ _rec->emit(DurationEventBuilder(ts).build(event, "B"));
break;
case Edge::END:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E"));
+ _rec->emit(DurationEventBuilder(ts).build(event, "E"));
break;
}
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 8154be592..7daa4851f 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -19,6 +19,10 @@
#include "util/EventRecorder.h"
+#include <vector>
+#include <utility>
+#include <string>
+
class EventCollector
{
public:
@@ -31,8 +35,24 @@ public:
struct Event
{
Edge edge;
+ uint32_t session_index;
+ uint32_t subg_index;
std::string backend;
+ uint32_t op_index;
+ std::string op_name;
+ uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations
+
+ // TODO deprecate this. label can be differ by writer. So let the writer decide label.
std::string label;
+
+ // user-defined data: pairs of (key, value)
+ std::vector<std::pair<std::string, std::string>> userData;
+
+ Event(Edge a_edge, const std::string &a_backend, const std::string &a_label)
+ : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0),
+ op_seq_size(0), label(a_label)
+ { /* empty */
+ }
};
public:
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
deleted file mode 100644
index 6c03a5b9a..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-
-#include "util/ConfigSource.h"
-#include "util/EventWriter.h"
-
-namespace onert
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
- // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
- if (!_recorder.empty())
- {
- try
- {
- // TODO Need better way for saved file path than the hardcoded path
- EventWriter{_recorder}.writeToFile("trace.global.json",
- EventWriter::WriteFormat::CHROME_TRACING);
- }
- catch (const std::exception &e)
- {
- std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl;
- }
- }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
- static EventCollectorGlobal instance;
- return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
- // Check if it has called begin-end pair
- assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
- _pair = false;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
- assert(!_pair);
- _pair = true;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h
deleted file mode 100644
index 1027ec84d..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "util/EventRecorder.h"
-#include "util/EventCollector.h"
-
-namespace onert
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
- /**
- * @brief Get the singleton object of this class
- *
- * @return EventCollectorGlobal& Singleton object
- */
- static EventCollectorGlobal &get();
-
-public:
- /**
- * @brief Getter for event collector object
- *
- * @return EventCollector& Collector object
- */
- EventCollector &collector() { return _collector; }
-
-private:
- EventCollectorGlobal();
- ~EventCollectorGlobal();
-
-private:
- EventRecorder _recorder;
- EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- * @param tag A label for the duration event
- */
- EventDurationBlock(const std::string &tag);
- /**
- * @brief Raise a duration event with type of END
- *
- */
- ~EventDurationBlock();
-
-private:
- std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- * Usage:
- * {
- * ...
- * EventDurationManual duration("some tag");
- * duration.begin();
- * ...
- * ... // Code for duration
- * ...
- * duration.end();
- * }
- *
- */
-class EventDurationManual
-{
-public:
- /**
- * @brief Construct a new Event Duration Manual object
- *
- * @param tag A label for the duration object
- */
- EventDurationManual(const std::string &tag);
- /**
- * @brief Destroy the Event Duration Manual object
- *
- */
- ~EventDurationManual();
-
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- */
- void begin();
- /**
- * @brief Raise a duration event with type of END
- *
- */
- void end();
-
-private:
- std::string _tag;
- bool _pair;
-};
-
-} // namespace util
-} // namespace onert
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- * EVENT_DURATION_FUNCTION();
- * ...
- * if(cond)
- * {
- * EVENT_DURATION_REGION("if branch");
- * ...
- * }
- * ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 7af4c7ddb..3ed40875f 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -27,8 +27,9 @@ struct Event
{
std::string name;
std::string tid;
- std::string ph; /* REQUIRED */
- std::string ts; /* REQUIRED */
+ std::string ph; /* REQUIRED */
+ std::string ts; /* REQUIRED */
+ std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
};
struct DurationEvent : public Event
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
index dacb40e64..8760a16db 100644
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -89,6 +89,7 @@ void fill(Content &content, const Event &evt)
content.flds.emplace_back("tid", evt.tid);
content.flds.emplace_back("ph", evt.ph);
content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
}
std::string object(const DurationEvent &evt)
@@ -418,40 +419,7 @@ struct MDTableBuilder
} // namespace
-EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder)
-{
- // DO NOTHING
-}
-
-void EventWriter::writeToFiles(const std::string &base_filepath)
-{
- // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
- writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK);
- writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING);
- writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE);
-}
-
-void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format)
-{
- std::ofstream os{filepath, std::ofstream::out};
- switch (write_format)
- {
- case WriteFormat::CHROME_TRACING:
- writeChromeTrace(os);
- break;
- case WriteFormat::SNPE_BENCHMARK:
- writeSNPEBenchmark(os);
- break;
- case WriteFormat::MD_TABLE:
- writeMDTable(os);
- break;
- default:
- assert(!"Invalid value");
- break;
- }
-}
-
-void EventWriter::writeSNPEBenchmark(std::ostream &os)
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
{
Json::Value root;
auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
@@ -475,11 +443,14 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
// Memory
{
std::unordered_map<std::string, Stat> mem_stats;
- for (auto &evt : _recorder.counter_events())
+ for (auto &recorder : recorders)
{
- auto &mem_stat = mem_stats[evt.name];
- uint64_t val = std::stoull(evt.values.at("value"));
- mem_stat.accumulate(val);
+ for (auto &evt : recorder->counter_events())
+ {
+ auto &mem_stat = mem_stats[evt.name];
+ uint64_t val = std::stoull(evt.values.at("value"));
+ mem_stat.accumulate(val);
+ }
}
auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
@@ -501,26 +472,29 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
// 2D keys : stats[tid][name]
std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &evt : _recorder.duration_events())
+ for (auto &recorder : recorders)
{
- auto &stat = stats[evt.tid][evt.name];
- auto &begin_ts = begin_timestamps[evt.tid][evt.name];
- uint64_t timestamp = std::stoull(evt.ts);
- if (evt.ph == "B")
+ for (auto &evt : recorder->duration_events())
{
- if (begin_ts != 0)
- throw std::runtime_error{"Invalid Data"};
- begin_ts = timestamp;
- }
- else if (evt.ph == "E")
- {
- if (begin_ts == 0 || timestamp < begin_ts)
- throw std::runtime_error{"Invalid Data"};
- stat.accumulate(timestamp - begin_ts);
- begin_ts = 0;
+ auto &stat = stats[evt.tid][evt.name];
+ auto &begin_ts = begin_timestamps[evt.tid][evt.name];
+ uint64_t timestamp = std::stoull(evt.ts);
+ if (evt.ph == "B")
+ {
+ if (begin_ts != 0)
+ throw std::runtime_error{"Invalid Data"};
+ begin_ts = timestamp;
+ }
+ else if (evt.ph == "E")
+ {
+ if (begin_ts == 0 || timestamp < begin_ts)
+ throw std::runtime_error{"Invalid Data"};
+ stat.accumulate(timestamp - begin_ts);
+ begin_ts = 0;
+ }
+ else
+ throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
}
- else
- throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
}
for (auto &kv : begin_timestamps)
@@ -545,30 +519,71 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
}
}
- os << root;
+ _os << root;
}
-void EventWriter::writeChromeTrace(std::ostream &os)
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
{
- os << "{\n";
- os << " " << quote("traceEvents") << ": [\n";
+ _os << "{\n";
+ _os << " " << quote("traceEvents") << ": [\n";
- for (auto &evt : _recorder.duration_events())
+ for (auto &recorder : recorders)
{
- os << " " << object(evt) << ",\n";
+ flushOneRecord(*recorder);
}
- for (auto &evt : _recorder.counter_events())
+ _os << " { }\n";
+ _os << " ]\n";
+ _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+ for (auto &evt : recorder.duration_events())
{
- os << " " << object(evt) << ",\n";
+ _os << " " << object(evt) << ",\n";
}
- os << " { }\n";
- os << " ]\n";
- os << "}\n";
+ for (auto &evt : recorder.counter_events())
+ {
+ _os << " " << object(evt) << ",\n";
+ }
}
-void EventWriter::writeMDTable(std::ostream &os)
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+ for (auto &recorder : records)
+ {
+ MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+ }
+}
+
+// initialization
+std::mutex EventWriter::_mutex;
+
+void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder)
{
- MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os);
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+
+ _recorders.emplace_back(std::move(recorder));
+
+ if (--_ref_count > 0)
+ return;
+ }
+ // The caller of this method is the last instance that uses EventWriter.
+ // Let's write log files.
+
+ // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+ flush(WriteFormat::SNPE_BENCHMARK);
+ flush(WriteFormat::CHROME_TRACING);
+ flush(WriteFormat::MD_TABLE);
+}
+
+void EventWriter::flush(WriteFormat write_format)
+{
+ auto *writer = _actual_writers[write_format].get();
+ assert(writer);
+
+ writer->flush(_recorders);
}
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h
index 7e838ca82..0dcd00be6 100644
--- a/runtime/onert/core/src/util/EventWriter.h
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -20,7 +20,49 @@
#include "EventRecorder.h"
#include <string>
-#include <ostream>
+#include <vector>
+#include <unordered_map>
+#include <mutex>
+#include <fstream>
+
+class EventFormatWriter
+{
+public:
+ EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
+ virtual ~EventFormatWriter() { /* empty */}
+
+ virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
+
+protected:
+ std::ofstream _os;
+};
+
+class SNPEWriter : public EventFormatWriter
+{
+public:
+ SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+class ChromeTracingWriter : public EventFormatWriter
+{
+public:
+ ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+ void flushOneRecord(const EventRecorder &);
+};
+
+class MDTableWriter : public EventFormatWriter
+{
+public:
+ MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+ void flushOneRecord(const EventRecorder &);
+};
class EventWriter
{
@@ -32,20 +74,58 @@ public:
MD_TABLE,
};
-public:
- EventWriter(const EventRecorder &recorder);
+ /**
+ * @brief Retuens a singleton object
+ */
+ static EventWriter *get(const std::string &filename)
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
-public:
- void writeToFiles(const std::string &base_filepath);
- void writeToFile(const std::string &filepath, WriteFormat write_format);
+ static EventWriter singleton(filename);
+ return &singleton;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter starts
+ */
+ void startToUse()
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+ _ref_count++;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter finishes.
+ * After multiple observers calls this method, the reference count will eventually be 0.
+ * Then, EventWriter will write profiling result file.
+ */
+ void readyToFlush(std::unique_ptr<EventRecorder> &&recorder);
private:
- void writeSNPEBenchmark(std::ostream &os);
- void writeChromeTrace(std::ostream &os);
- void writeMDTable(std::ostream &os);
+ EventWriter(const std::string &filepath) : _ref_count(0)
+ {
+ std::string snpe_log_name(filepath);
+ std::string chrome_tracing_log_name(filepath + ".chrome.json");
+ std::string md_table_log_name(filepath + ".table.md");
+
+ _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
+ _actual_writers[WriteFormat::CHROME_TRACING] =
+ std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+ _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
+ };
+
+ void flush(WriteFormat write_format);
private:
- const EventRecorder &_recorder;
+ static std::mutex _mutex;
+
+ // number of observer of an executor that want to write profiling data
+ int32_t _ref_count;
+
+ // one recorder object per executor
+ std::vector<std::unique_ptr<EventRecorder>> _recorders;
+
+ std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers;
};
#endif // __ONERT_UTIL_EVENT_WRITER_H__
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 1f468a8b5..3ed3080cc 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -128,11 +128,11 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
return broadcastShapes(lhs_shape, rhs_shape);
}
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank)
{
if (axis < 0 || axis >= rank)
{
- throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis));
+ throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis));
}
ir::Shape out_shape;
@@ -385,18 +385,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis)
return out_shape;
}
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf)
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf)
{
- ir::Shape out_shape(in_shape.dim(0));
+ ir::Shape out_shape(fill_shape.dim(0));
for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
{
- out_shape.dim(out_x) = in_buf[out_x];
+ out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]);
}
return out_shape;
}
+// template instantiation
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf);
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf);
+
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
{
assert(in_shape.rank() >= 2);
diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc
new file mode 100644
index 000000000..08a1b32a7
--- /dev/null
+++ b/runtime/onert/core/src/util/TracingCtx.cc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace util
+{
+
+// initializing static member var
+std::mutex TracingCtx::_session_id_mutex;
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/frontend/.clang-format b/runtime/onert/frontend/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/frontend/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index c0003e402..f9c97b41b 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -68,7 +68,7 @@ public:
* @param graph reference on subgraphs
*/
explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
}
@@ -114,23 +114,19 @@ private:
// Operations
template <typename OpIR, typename... Args>
const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args);
- void loadConv2D(const Operator *op, ir::Graph &subg);
- void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
- void loadTransposeConv(const Operator *op, ir::Graph &subg);
- void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
- void loadReshape(const Operator *op, ir::Graph &subg);
- void loadSoftmax(const Operator *op, ir::Graph &subg);
- void loadConcatenation(const Operator *op, ir::Graph &subg);
- void loadFC(const Operator *op, ir::Graph &subg);
+
+ void loadAddV2(const Operator *op, ir::Graph &subg);
+ void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax);
+ void loadBatchMatMul(const Operator *op, ir::Graph &subg);
void loadBinaryArithmetic(const Operator *op, ir::Graph &subg,
ir::operation::BinaryArithmetic::ArithmeticType op_type);
- void loadAddV2(const Operator *op, ir::Graph &subg);
- void loadPack(const Operator *op, ir::Graph &subg);
- void loadResizeBilinear(const Operator *op, ir::Graph &subg);
- void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
- void loadReduce(const Operator *op, ir::Graph &subg,
- ir::operation::Reduce::ReduceType reduce_type);
- void loadReduceAll(const Operator *op, ir::Graph &subg);
+ void loadComparison(const Operator *op, ir::Graph &subg);
+ void loadConcatenation(const Operator *op, ir::Graph &subg);
+ void loadConv2D(const Operator *op, ir::Graph &subg);
+ void loadCustom(const Operator *op, ir::Graph &subg);
+ void loadDepthToSpace(const Operator *op, ir::Graph &subg);
+ void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
+ void loadEinsum(const Operator *op, ir::Graph &subg);
void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseActivation::Type op_type,
float alpha = 0.f, float beta = 0.f);
@@ -138,25 +134,31 @@ private:
ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type);
void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseUnary::Type op_type);
+ void loadFC(const Operator *op, ir::Graph &subg);
+ void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
- void loadCustom(const Operator *op, ir::Graph &subg);
- void loadBatchMatMul(const Operator *op, ir::Graph &subg);
- void loadSqueeze(const Operator *op, ir::Graph &subg);
+ void loadIf(const Operator *op, ir::Graph &subg);
+ void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+ void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+ void loadOneHot(const Operator *op, ir::Graph &subg);
+ void loadPack(const Operator *op, ir::Graph &subg);
+ void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
+ void loadReduce(const Operator *op, ir::Graph &subg,
+ ir::operation::Reduce::ReduceType reduce_type);
+ void loadReduceAll(const Operator *op, ir::Graph &subg);
+ void loadReshape(const Operator *op, ir::Graph &subg);
+ void loadResizeBilinear(const Operator *op, ir::Graph &subg);
+ void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
+ void loadSoftmax(const Operator *op, ir::Graph &subg);
+ void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
void loadSplit(const Operator *op, ir::Graph &subg);
void loadSplitV(const Operator *op, ir::Graph &subg);
+ void loadSqueeze(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
+ void loadTransposeConv(const Operator *op, ir::Graph &subg);
+ void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
- void loadComparison(const Operator *op, ir::Graph &subg);
- void loadEinsum(const Operator *op, ir::Graph &subg);
- void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
- void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
- void loadLogSoftmax(const Operator *op, ir::Graph &subg);
- void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
- void loadLeakyRelu(const Operator *op, ir::Graph &subg);
- void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
void verifySubgraphIndex(int subg_index)
{
@@ -255,19 +257,26 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te
{
case TensorType::TensorType_FLOAT32:
return ir::DataType::FLOAT32;
+ case TensorType::TensorType_FLOAT16:
+ return ir::DataType::FLOAT16;
case TensorType::TensorType_INT32:
return ir::DataType::INT32;
- case TensorType::TensorType_BOOL:
- return ir::DataType::BOOL8;
case TensorType::TensorType_UINT8:
return ir::DataType::QUANT_UINT8_ASYMM;
- case TensorType::TensorType_INT8:
- return ir::DataType::QUANT_INT8_ASYMM;
case TensorType::TensorType_INT64:
return ir::DataType::INT64;
+ // case TensorType::TensorType_STRING:
+ case TensorType::TensorType_BOOL:
+ return ir::DataType::BOOL8;
+ case TensorType::TensorType_INT16:
+ return ir::DataType::QUANT_INT16_ASYMM;
+ // case TensorType::TensorType_COMPLEX64
+ case TensorType::TensorType_INT8:
+ return ir::DataType::QUANT_INT8_ASYMM;
+ // case TensorType::TensorType_FLOAT64
default:
throw std::runtime_error(
- std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
+ std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
}
}
@@ -385,7 +394,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
{
size_t offset = unaligned_offset_start - aligned_offset_start;
uint8_t *mmap_base = static_cast<uint8_t *>(
- mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
+ mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
munmap(mmap_base, mmap_size);
}
@@ -446,7 +455,7 @@ void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shap
bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2;
if (dim_metadata_size != !random_sparsity && !block2D_sparsity)
throw std::runtime_error(
- "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
+ "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
if (src_metadata->format() != DimensionType::DimensionType_DENSE)
@@ -514,8 +523,8 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn
auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
throw std::runtime_error(
- std::string("loader doesn't support optional input tensor yet for ")
- .append(EnumNameBuiltinOperator(builtin_code)));
+ std::string("loader doesn't support optional input tensor yet for ")
+ .append(EnumNameBuiltinOperator(builtin_code)));
};
check_optional_input();
inputs.append(tensorIdxToOperandIdx(idx));
@@ -691,9 +700,9 @@ void BaseLoader<LoaderDomain>::loadFC(const Operator *op, ir::Graph &subg)
const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param);
const auto &input_operand =
- subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
auto &weights_operand =
- subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
@@ -719,7 +728,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
auto attr_map = data_root.AsMap();
const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
- attr_map["fused_activation_function"].AsInt8());
+ attr_map["fused_activation_function"].AsInt8());
param.activation = convertActivation(fused_activation_func);
}
@@ -727,8 +736,18 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
}
template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthToSpace(const Operator *op, ir::Graph &subg)
+{
+ ir::operation::DepthToSpace::Param param;
+ const auto *options = op->builtin_options_as_DepthToSpaceOptions();
+ param.block_size = options->block_size();
+
+ loadOperationTo<ir::operation::DepthToSpace>(op, subg, param);
+}
+
+template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadBinaryArithmetic(
- const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
+ const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
{
ir::operation::BinaryArithmetic::Param param;
param.arithmetic_type = op_type;
@@ -780,8 +799,8 @@ void BaseLoader<LoaderDomain>::loadPack(const Operator *op, ir::Graph &subg)
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadElementwiseActivation(
- const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
- float alpha, float beta)
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+ float alpha, float beta)
{
ir::operation::ElementwiseActivation::Param param;
param.op_type = op_type;
@@ -844,8 +863,8 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadElementwiseBinary(
- const Operator *op, ir::Graph &subg,
- ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+ const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
{
ir::operation::ElementwiseBinary::Param param;
param.op_type = op_type;
@@ -870,7 +889,7 @@ void BaseLoader<LoaderDomain>::loadElementwiseUnary(const Operator *op, ir::Grap
}
};
qasymm8ToUint8(
- subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0)));
}
}
@@ -915,8 +934,8 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
break;
default:
throw std::runtime_error(
- std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
- " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
+ std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
+ " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
}
loadOperationTo<ir::operation::BatchMatMul>(op, subg, param);
@@ -959,15 +978,15 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
// Mapping from custom op name string to BuiltinOP enum
std::map<std::string, BuiltinOP> builtin_map = {
- {"AddV2", BuiltinOP::AddV2},
- {"All", BuiltinOP::ReduceAll},
- {"MatrixBandPart", BuiltinOP::MatrixBandPart},
- {"BatchMatMulV2", BuiltinOP::BatchMatMul},
- {"Einsum", BuiltinOP::Einsum},
- {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
- {"BroadcastTo", BuiltinOP::BroadcastTo},
- {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
- {"Erf", BuiltinOP::Erf},
+ {"AddV2", BuiltinOP::AddV2},
+ {"All", BuiltinOP::ReduceAll},
+ {"MatrixBandPart", BuiltinOP::MatrixBandPart},
+ {"BatchMatMulV2", BuiltinOP::BatchMatMul},
+ {"Einsum", BuiltinOP::Einsum},
+ {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
+ {"BroadcastTo", BuiltinOP::BroadcastTo},
+ {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+ {"Erf", BuiltinOP::Erf},
};
try
@@ -1005,7 +1024,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
break;
default:
throw std::runtime_error{
- "Loader: Custom OP map is defined but operation loader function is not defined"};
+ "Loader: Custom OP map is defined but operation loader function is not defined"};
}
return;
@@ -1120,7 +1139,7 @@ void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &sub
break;
default:
throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
}
loadOperationTo<ir::operation::Comparison>(op, subg, param);
@@ -1224,25 +1243,15 @@ void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
}
template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadArgMax(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain>::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax)
{
- ir::operation::ArgMax::Param param;
- const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type();
- switch (output_type)
- {
- case TensorType::TensorType_INT32:
- case TensorType::TensorType_INT64:
- param.output_type = tensorTypeToDataType(output_type);
- break;
- default:
- throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64.");
- }
- auto am = loadOperationTo<ir::operation::ArgMax>(op, subg, param);
+ ir::operation::ArgMinMax::Param param;
+ const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type()
+ : op->builtin_options_as_ArgMinOptions()->output_type();
+ param.output_type = tensorTypeToDataType(output_type);
+ param.is_arg_max = is_argmax;
- auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS));
- if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
- axisOperand.typeInfo().type() == ir::DataType::INT64)))
- throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
+ loadOperationTo<ir::operation::ArgMinMax>(op, subg, param);
}
template <typename LoaderDomain>
@@ -1287,7 +1296,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
{
auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
- .append(EnumNameBuiltinOperator(builtin_code)));
+ .append(EnumNameBuiltinOperator(builtin_code)));
}
for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i)
{
@@ -1355,6 +1364,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_ELU:
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU);
+ return;
case BuiltinOperator::BuiltinOperator_RELU:
loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
ir::operation::ElementwiseActivation::infinity, 0.f);
@@ -1383,6 +1395,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
case BuiltinOperator::BuiltinOperator_SQRT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
+ case BuiltinOperator::BuiltinOperator_SQUARE:
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE);
+ return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
loadOperationTo<ir::operation::SquaredDifference>(op, subg);
return;
@@ -1499,7 +1514,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
return;
case BuiltinOperator::BuiltinOperator_ARG_MAX:
- loadArgMax(op, subg);
+ loadArgMinMax(op, subg, true);
+ return;
+ case BuiltinOperator::BuiltinOperator_ARG_MIN:
+ loadArgMinMax(op, subg, false);
return;
case BuiltinOperator::BuiltinOperator_LOG:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
@@ -1513,6 +1531,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
+ case BuiltinOperator::BuiltinOperator_LOGICAL_AND:
+ loadElementwiseBinary(op, subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+ return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
loadElementwiseBinary(op, subg,
ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
@@ -1556,9 +1578,12 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
loadUnidirectionalSequenceLSTM(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
+ loadDepthToSpace(op, subg);
+ return;
default:
throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
}
}
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 33e1709a8..0d7b3eab4 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -196,7 +196,7 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
param.activation = convertActivation(options->fused_activation_function());
std::unique_ptr<ir::Operation> new_op(
- new ir::operation::BCQFullyConnected(inputs, outputs, param));
+ new ir::operation::BCQFullyConnected(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
index 0ff1f72a2..eb1775297 100644
--- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
@@ -2155,9 +2155,8 @@ enum ActivationFunctionType
inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
{
static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+ ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
return values;
}
@@ -2218,9 +2217,8 @@ enum FullyConnectedOptionsWeightsFormat
inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3]
{
static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
- FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
+ FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
return values;
}
@@ -2478,8 +2476,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
const circle::CustomQuantization *details_as_CustomQuantization() const
{
return details_type() == circle::QuantizationDetails_CustomQuantization
- ? static_cast<const circle::CustomQuantization *>(details())
- : nullptr;
+ ? static_cast<const circle::CustomQuantization *>(details())
+ : nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
@@ -2551,12 +2549,12 @@ struct QuantizationParametersBuilder
};
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
- flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
- circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+ circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
QuantizationParametersBuilder builder_(_fbb);
builder_.add_quantized_dimension(quantized_dimension);
@@ -2570,11 +2568,11 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
}
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
@@ -2789,20 +2787,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const circle::Int32Vector *array_segments_as_Int32Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Int32Vector
- ? static_cast<const circle::Int32Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Int32Vector *>(array_segments())
+ : nullptr;
}
const circle::Uint16Vector *array_segments_as_Uint16Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Uint16Vector
- ? static_cast<const circle::Uint16Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Uint16Vector *>(array_segments())
+ : nullptr;
}
const circle::Uint8Vector *array_segments_as_Uint8Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Uint8Vector
- ? static_cast<const circle::Uint8Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Uint8Vector *>(array_segments())
+ : nullptr;
}
circle::SparseIndexVector array_indices_type() const
{
@@ -2813,20 +2811,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const circle::Int32Vector *array_indices_as_Int32Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Int32Vector
- ? static_cast<const circle::Int32Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Int32Vector *>(array_indices())
+ : nullptr;
}
const circle::Uint16Vector *array_indices_as_Uint16Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Uint16Vector
- ? static_cast<const circle::Uint16Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Uint16Vector *>(array_indices())
+ : nullptr;
}
const circle::Uint8Vector *array_indices_as_Uint8Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Uint8Vector
- ? static_cast<const circle::Uint8Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Uint8Vector *>(array_indices())
+ : nullptr;
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -2924,12 +2922,12 @@ struct DimensionMetadataBuilder
};
inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::DimensionType format = circle::DimensionType_DENSE, int32_t dense_size = 0,
- circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_segments = 0,
- circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_indices = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE,
+ int32_t dense_size = 0,
+ circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_segments = 0,
+ circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_indices = 0)
{
DimensionMetadataBuilder builder_(_fbb);
builder_.add_array_indices(array_indices);
@@ -2961,7 +2959,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>(
- VT_DIM_METADATA);
+ VT_DIM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -2987,8 +2985,8 @@ struct SparsityParametersBuilder
fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
}
void add_dim_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata)
{
fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
}
@@ -3006,11 +3004,11 @@ struct SparsityParametersBuilder
};
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata = 0)
{
SparsityParametersBuilder builder_(_fbb);
builder_.add_dim_metadata(dim_metadata);
@@ -3020,16 +3018,15 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
}
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
- const std::vector<int32_t> *block_map = nullptr,
- const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
{
auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
auto dim_metadata__ =
- dim_metadata
- ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
- : 0;
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
+ : 0;
return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__);
}
@@ -3155,12 +3152,11 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
- const char *name = nullptr,
- flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
- const std::vector<int32_t> *shape_signature = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+ const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+ bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
{
auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
auto name__ = name ? _fbb.CreateString(name) : 0;
@@ -3190,7 +3186,7 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3249,10 +3245,10 @@ struct Conv2DOptionsBuilder
};
inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
Conv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3287,7 +3283,7 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -3344,9 +3340,9 @@ struct Pool2DOptionsBuilder
};
inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
Pool2DOptionsBuilder builder_(_fbb);
builder_.add_filter_height(filter_height);
@@ -3381,7 +3377,7 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3445,10 +3441,10 @@ struct DepthwiseConv2DOptionsBuilder
};
inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
DepthwiseConv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3499,12 +3495,12 @@ struct ConcatEmbeddingsOptionsBuilder
fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
}
void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
}
void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
embedding_dim_per_channel);
@@ -3523,9 +3519,9 @@ struct ConcatEmbeddingsOptionsBuilder
};
inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
{
ConcatEmbeddingsOptionsBuilder builder_(_fbb);
builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -3540,9 +3536,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
{
auto num_columns_per_channel__ =
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
auto embedding_dim_per_channel__ =
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
embedding_dim_per_channel__);
}
@@ -3609,7 +3605,7 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3653,9 +3649,9 @@ struct SVDFOptionsBuilder
};
inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SVDFOptionsBuilder builder_(_fbb);
builder_.add_rank(rank);
@@ -3675,7 +3671,7 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3718,9 +3714,9 @@ struct RNNOptionsBuilder
};
inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
RNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3741,7 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
@@ -3789,9 +3785,9 @@ struct SequenceRNNOptionsBuilder
};
inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3814,7 +3810,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
bool asymmetric_quantize_inputs() const
@@ -3869,9 +3865,9 @@ struct BidirectionalSequenceRNNOptionsBuilder
};
inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3894,12 +3890,12 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
circle::FullyConnectedOptionsWeightsFormat weights_format() const
{
return static_cast<circle::FullyConnectedOptionsWeightsFormat>(
- GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+ GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
}
bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
bool asymmetric_quantize_inputs() const
@@ -3955,11 +3951,11 @@ struct FullyConnectedOptionsBuilder
};
inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- circle::FullyConnectedOptionsWeightsFormat weights_format =
- circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
- bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ circle::FullyConnectedOptionsWeightsFormat weights_format =
+ circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
{
FullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -4023,7 +4019,7 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4057,8 +4053,8 @@ struct ConcatenationOptionsBuilder
};
inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
ConcatenationOptionsBuilder builder_(_fbb);
builder_.add_axis(axis);
@@ -4076,7 +4072,7 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4109,8 +4105,8 @@ struct AddOptionsBuilder
};
inline flatbuffers::Offset<AddOptions> CreateAddOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
AddOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -4127,7 +4123,7 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4160,8 +4156,8 @@ struct MulOptionsBuilder
};
inline flatbuffers::Offset<MulOptions> CreateMulOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
MulOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -4178,7 +4174,7 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -4211,8 +4207,8 @@ struct L2NormOptionsBuilder
};
inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
L2NormOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -4263,7 +4259,7 @@ struct LocalResponseNormalizationOptionsBuilder
fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
}
explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -4303,7 +4299,7 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4367,11 +4363,11 @@ struct LSTMOptionsBuilder
};
inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f,
- circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f,
+ circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
+ bool asymmetric_quantize_inputs = false)
{
LSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
@@ -4396,7 +4392,7 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4445,7 +4441,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -4461,10 +4457,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
{
UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
@@ -4490,7 +4486,7 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4546,7 +4542,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -4561,10 +4557,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
};
inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
- bool time_major = true, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
@@ -5075,7 +5071,7 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -5108,8 +5104,8 @@ struct SubOptionsBuilder
};
inline flatbuffers::Offset<SubOptions> CreateSubOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
SubOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -5126,7 +5122,7 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -5159,8 +5155,8 @@ struct DivOptionsBuilder
};
inline flatbuffers::Offset<DivOptions> CreateDivOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
DivOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
@@ -7976,7 +7972,7 @@ struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -8014,8 +8010,8 @@ struct BCQFullyConnectedOptionsBuilder
};
inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
BCQFullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_weights_hidden_size(weights_hidden_size);
@@ -8035,7 +8031,7 @@ struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -8072,8 +8068,8 @@ struct InstanceNormOptionsBuilder
};
inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
- flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
InstanceNormOptionsBuilder builder_(_fbb);
builder_.add_epsilon(epsilon);
@@ -8191,632 +8187,632 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions
- ? static_cast<const circle::Conv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::Conv2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
}
const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions
- ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
+ : nullptr;
}
const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions
- ? static_cast<const circle::Pool2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::Pool2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::SVDFOptions *builtin_options_as_SVDFOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SVDFOptions
- ? static_cast<const circle::SVDFOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SVDFOptions *>(builtin_options())
+ : nullptr;
}
const circle::RNNOptions *builtin_options_as_RNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RNNOptions
- ? static_cast<const circle::RNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions
- ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions
- ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions
- ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
+ : nullptr;
}
const circle::AddOptions *builtin_options_as_AddOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AddOptions
- ? static_cast<const circle::AddOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AddOptions *>(builtin_options())
+ : nullptr;
}
const circle::L2NormOptions *builtin_options_as_L2NormOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_L2NormOptions
- ? static_cast<const circle::L2NormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::L2NormOptions *>(builtin_options())
+ : nullptr;
}
const circle::LocalResponseNormalizationOptions *
builtin_options_as_LocalResponseNormalizationOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
}
const circle::LSTMOptions *builtin_options_as_LSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LSTMOptions
- ? static_cast<const circle::LSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
}
const circle::CallOptions *builtin_options_as_CallOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CallOptions
- ? static_cast<const circle::CallOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CallOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions
- ? static_cast<const circle::ReshapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReshapeOptions *>(builtin_options())
+ : nullptr;
}
const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions
- ? static_cast<const circle::SkipGramOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SkipGramOptions *>(builtin_options())
+ : nullptr;
}
const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
}
const circle::EmbeddingLookupSparseOptions *
builtin_options_as_EmbeddingLookupSparseOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
}
const circle::MulOptions *builtin_options_as_MulOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MulOptions
- ? static_cast<const circle::MulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MulOptions *>(builtin_options())
+ : nullptr;
}
const circle::PadOptions *builtin_options_as_PadOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PadOptions
- ? static_cast<const circle::PadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PadOptions *>(builtin_options())
+ : nullptr;
}
const circle::GatherOptions *builtin_options_as_GatherOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GatherOptions
- ? static_cast<const circle::GatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GatherOptions *>(builtin_options())
+ : nullptr;
}
const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
}
const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
}
const circle::TransposeOptions *builtin_options_as_TransposeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TransposeOptions
- ? static_cast<const circle::TransposeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TransposeOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReducerOptions *builtin_options_as_ReducerOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReducerOptions
- ? static_cast<const circle::ReducerOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReducerOptions *>(builtin_options())
+ : nullptr;
}
const circle::SubOptions *builtin_options_as_SubOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SubOptions
- ? static_cast<const circle::SubOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SubOptions *>(builtin_options())
+ : nullptr;
}
const circle::DivOptions *builtin_options_as_DivOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DivOptions
- ? static_cast<const circle::DivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DivOptions *>(builtin_options())
+ : nullptr;
}
const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions
- ? static_cast<const circle::SqueezeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SqueezeOptions *>(builtin_options())
+ : nullptr;
}
const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions
- ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions
- ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
+ : nullptr;
}
const circle::ExpOptions *builtin_options_as_ExpOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ExpOptions
- ? static_cast<const circle::ExpOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ExpOptions *>(builtin_options())
+ : nullptr;
}
const circle::TopKV2Options *builtin_options_as_TopKV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_TopKV2Options
- ? static_cast<const circle::TopKV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TopKV2Options *>(builtin_options())
+ : nullptr;
}
const circle::SplitOptions *builtin_options_as_SplitOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SplitOptions
- ? static_cast<const circle::SplitOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SplitOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::CastOptions *builtin_options_as_CastOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CastOptions
- ? static_cast<const circle::CastOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CastOptions *>(builtin_options())
+ : nullptr;
}
const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions
- ? static_cast<const circle::DequantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DequantizeOptions *>(builtin_options())
+ : nullptr;
}
const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
}
const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions
- ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::LessOptions *builtin_options_as_LessOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LessOptions
- ? static_cast<const circle::LessOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LessOptions *>(builtin_options())
+ : nullptr;
}
const circle::NegOptions *builtin_options_as_NegOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_NegOptions
- ? static_cast<const circle::NegOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NegOptions *>(builtin_options())
+ : nullptr;
}
const circle::PadV2Options *builtin_options_as_PadV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_PadV2Options
- ? static_cast<const circle::PadV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PadV2Options *>(builtin_options())
+ : nullptr;
}
const circle::GreaterOptions *builtin_options_as_GreaterOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GreaterOptions
- ? static_cast<const circle::GreaterOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GreaterOptions *>(builtin_options())
+ : nullptr;
}
const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions
- ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions
- ? static_cast<const circle::LessEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LessEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::SelectOptions *builtin_options_as_SelectOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SelectOptions
- ? static_cast<const circle::SelectOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SelectOptions *>(builtin_options())
+ : nullptr;
}
const circle::SliceOptions *builtin_options_as_SliceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SliceOptions
- ? static_cast<const circle::SliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SliceOptions *>(builtin_options())
+ : nullptr;
}
const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions
- ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
+ : nullptr;
}
const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions
- ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
+ : nullptr;
}
const circle::TileOptions *builtin_options_as_TileOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TileOptions
- ? static_cast<const circle::TileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TileOptions *>(builtin_options())
+ : nullptr;
}
const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions
- ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
+ : nullptr;
}
const circle::EqualOptions *builtin_options_as_EqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_EqualOptions
- ? static_cast<const circle::EqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::EqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions
- ? static_cast<const circle::NotEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NotEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::ShapeOptions *builtin_options_as_ShapeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ShapeOptions
- ? static_cast<const circle::ShapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ShapeOptions *>(builtin_options())
+ : nullptr;
}
const circle::PowOptions *builtin_options_as_PowOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PowOptions
- ? static_cast<const circle::PowOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PowOptions *>(builtin_options())
+ : nullptr;
}
const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions
- ? static_cast<const circle::ArgMinOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ArgMinOptions *>(builtin_options())
+ : nullptr;
}
const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions
- ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
+ : nullptr;
}
const circle::PackOptions *builtin_options_as_PackOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PackOptions
- ? static_cast<const circle::PackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PackOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions
- ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
+ : nullptr;
}
const circle::OneHotOptions *builtin_options_as_OneHotOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_OneHotOptions
- ? static_cast<const circle::OneHotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::OneHotOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions
- ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions
- ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
+ : nullptr;
}
const circle::UnpackOptions *builtin_options_as_UnpackOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UnpackOptions
- ? static_cast<const circle::UnpackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UnpackOptions *>(builtin_options())
+ : nullptr;
}
const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions
- ? static_cast<const circle::FloorDivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FloorDivOptions *>(builtin_options())
+ : nullptr;
}
const circle::SquareOptions *builtin_options_as_SquareOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SquareOptions
- ? static_cast<const circle::SquareOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SquareOptions *>(builtin_options())
+ : nullptr;
}
const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions
- ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
+ : nullptr;
}
const circle::FillOptions *builtin_options_as_FillOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FillOptions
- ? static_cast<const circle::FillOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FillOptions *>(builtin_options())
+ : nullptr;
}
const circle::BidirectionalSequenceLSTMOptions *
builtin_options_as_BidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::BidirectionalSequenceRNNOptions *
builtin_options_as_BidirectionalSequenceRNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::UnidirectionalSequenceLSTMOptions *
builtin_options_as_UnidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::FloorModOptions *builtin_options_as_FloorModOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FloorModOptions
- ? static_cast<const circle::FloorModOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FloorModOptions *>(builtin_options())
+ : nullptr;
}
const circle::RangeOptions *builtin_options_as_RangeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RangeOptions
- ? static_cast<const circle::RangeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RangeOptions *>(builtin_options())
+ : nullptr;
}
const circle::ResizeNearestNeighborOptions *
builtin_options_as_ResizeNearestNeighborOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
}
const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions
- ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
+ : nullptr;
}
const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
}
const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions
- ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
+ : nullptr;
}
const circle::AbsOptions *builtin_options_as_AbsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AbsOptions
- ? static_cast<const circle::AbsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AbsOptions *>(builtin_options())
+ : nullptr;
}
const circle::SplitVOptions *builtin_options_as_SplitVOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SplitVOptions
- ? static_cast<const circle::SplitVOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SplitVOptions *>(builtin_options())
+ : nullptr;
}
const circle::UniqueOptions *builtin_options_as_UniqueOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UniqueOptions
- ? static_cast<const circle::UniqueOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UniqueOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options
- ? static_cast<const circle::ReverseV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReverseV2Options *>(builtin_options())
+ : nullptr;
}
const circle::AddNOptions *builtin_options_as_AddNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AddNOptions
- ? static_cast<const circle::AddNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AddNOptions *>(builtin_options())
+ : nullptr;
}
const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions
- ? static_cast<const circle::GatherNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GatherNdOptions *>(builtin_options())
+ : nullptr;
}
const circle::CosOptions *builtin_options_as_CosOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CosOptions
- ? static_cast<const circle::CosOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CosOptions *>(builtin_options())
+ : nullptr;
}
const circle::WhereOptions *builtin_options_as_WhereOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_WhereOptions
- ? static_cast<const circle::WhereOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::WhereOptions *>(builtin_options())
+ : nullptr;
}
const circle::RankOptions *builtin_options_as_RankOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RankOptions
- ? static_cast<const circle::RankOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RankOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions
- ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
}
const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions
- ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
+ : nullptr;
}
const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions
- ? static_cast<const circle::QuantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::QuantizeOptions *>(builtin_options())
+ : nullptr;
}
const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions
- ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
}
const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions
- ? static_cast<const circle::HardSwishOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::HardSwishOptions *>(builtin_options())
+ : nullptr;
}
const circle::IfOptions *builtin_options_as_IfOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_IfOptions
- ? static_cast<const circle::IfOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::IfOptions *>(builtin_options())
+ : nullptr;
}
const circle::WhileOptions *builtin_options_as_WhileOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_WhileOptions
- ? static_cast<const circle::WhileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::WhileOptions *>(builtin_options())
+ : nullptr;
}
const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions
- ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
}
const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
{
return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options
- ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
}
const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
{
return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options
- ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
}
const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions
- ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
+ : nullptr;
}
const circle::SelectV2Options *builtin_options_as_SelectV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_SelectV2Options
- ? static_cast<const circle::SelectV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SelectV2Options *>(builtin_options())
+ : nullptr;
}
const circle::DensifyOptions *builtin_options_as_DensifyOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DensifyOptions
- ? static_cast<const circle::DensifyOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DensifyOptions *>(builtin_options())
+ : nullptr;
}
const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions
- ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
+ : nullptr;
}
const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions
- ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
+ : nullptr;
}
const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
- ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
+ : nullptr;
}
const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions
- ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions
- ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
+ : nullptr;
}
const flatbuffers::Vector<uint8_t> *custom_options() const
{
@@ -9558,7 +9554,7 @@ struct OperatorBuilder
static_cast<int8_t>(custom_options_format), 0);
}
void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
{
fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
}
@@ -9580,15 +9576,15 @@ struct OperatorBuilder
};
inline flatbuffers::Offset<Operator> CreateOperator(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
- circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+ circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
{
OperatorBuilder builder_(_fbb);
builder_.add_intermediates(intermediates);
@@ -9604,20 +9600,20 @@ inline flatbuffers::Offset<Operator> CreateOperator(
}
inline flatbuffers::Offset<Operator> CreateOperatorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
- circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- const std::vector<uint8_t> *custom_options = nullptr,
- circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
- const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
- const std::vector<int32_t> *intermediates = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ const std::vector<uint8_t> *custom_options = nullptr,
+ circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+ const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+ const std::vector<int32_t> *intermediates = nullptr)
{
auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
auto mutating_variable_inputs__ =
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
builtin_options, custom_options__, custom_options_format,
@@ -9651,7 +9647,7 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>(
- VT_OPERATORS);
+ VT_OPERATORS);
}
const flatbuffers::String *name() const
{
@@ -9693,7 +9689,7 @@ struct SubGraphBuilder
fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
}
void add_operators(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
{
fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
}
@@ -9719,13 +9715,13 @@ struct SubGraphBuilder
};
inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
{
SubGraphBuilder builder_(_fbb);
builder_.add_name(name);
@@ -9738,17 +9734,17 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
}
inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
- const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
+ const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
{
auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0;
auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
auto operators__ =
- operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
+ operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
auto name__ = name ? _fbb.CreateString(name) : 0;
return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__,
data_format);
@@ -9893,12 +9889,12 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>(
- VT_OPERATOR_CODES);
+ VT_OPERATOR_CODES);
}
const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>(
- VT_SUBGRAPHS);
+ VT_SUBGRAPHS);
}
const flatbuffers::String *description() const
{
@@ -9915,7 +9911,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
- VT_METADATA);
+ VT_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -9939,13 +9935,13 @@ struct ModelBuilder
flatbuffers::uoffset_t start_;
void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
- operator_codes)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+ operator_codes)
{
fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
}
void add_subgraphs(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
{
fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
}
@@ -9963,7 +9959,7 @@ struct ModelBuilder
fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
}
void add_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
{
fbb_.AddOffset(Model::VT_METADATA, metadata);
}
@@ -9981,14 +9977,14 @@ struct ModelBuilder
};
inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
- operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+ operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
{
ModelBuilder builder_(_fbb);
builder_.add_metadata(metadata);
@@ -10002,24 +9998,24 @@ inline flatbuffers::Offset<Model> CreateModel(
}
inline flatbuffers::Offset<Model> CreateModelDirect(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
- const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
- const char *description = nullptr,
- const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
- const std::vector<int32_t> *metadata_buffer = nullptr,
- const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
+ const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
+ const char *description = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
+ const std::vector<int32_t> *metadata_buffer = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
{
auto operator_codes__ =
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
- : 0;
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
+ : 0;
auto subgraphs__ =
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
auto description__ = description ? _fbb.CreateString(description) : 0;
auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0;
auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
auto metadata__ =
- metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
metadata_buffer__, metadata__);
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 81cd38f4f..63036a398 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -20,7 +20,9 @@
// TODO Support multiple subgraphs
ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
- : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}}
+ : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
+ _subgraphs.get())},
+ _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
{
if (model->allowedToFp16())
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index 5f0650b9a..bd61f9d86 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -23,6 +23,7 @@
#include "ir/Graph.h"
#include "ir/Subgraphs.h"
#include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
{
@@ -40,6 +41,14 @@ public:
private:
std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
+ // TODO Refine the ownership of TracingCtx
+ // In case of nnfw API, nnfw_session has ownership of TracingCtx.
+ // In case of nnapi, there is no concept of session and primary model might have the ownership
+ // of TracingCtx.
+ // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
+ // and let's make it work with one model for now.
+ std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
+
std::shared_ptr<onert::compiler::Compiler> _compiler;
std::shared_ptr<onert::exec::ExecutorMap> _executors;
};
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
index 2bea729be..b0ea51917 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
@@ -20,7 +20,7 @@
#include "util/logging.h"
ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution)
- : _execution{execution}
+ : _execution{execution}
{
// DO NOTHING
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
index 6114b74b0..21c7cdd6f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
@@ -140,8 +140,8 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
// words, we can assume that io_layout from nnapi always is the same as layout of the used
@@ -173,8 +173,8 @@ bool ANeuralNetworksExecution::setOptionalInput(uint32_t index,
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// ANeuralNetworksExecution::setInput() uses only shape information
ANeuralNetworksOperandType optional_input_type;
@@ -208,8 +208,8 @@ bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOp
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
// words, we can assume that io_layout from nnapi always is the same as layout of the used
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 1f4b868f6..70c5d2a4b 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -27,7 +27,7 @@ struct ANeuralNetworksExecution
{
public:
ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
- : _execution{std::make_shared<onert::exec::Execution>(executors)}
+ : _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 97b820aea..3e2bea114 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -27,7 +27,7 @@
// ANeuralNetworksModel
//
ANeuralNetworksModel::ANeuralNetworksModel() noexcept
- : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+ : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
{
_graph = std::make_shared<onert::ir::Graph>();
}
@@ -72,12 +72,12 @@ bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, s
if (copy)
{
_graph->operands().at(ind).data(
- std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
}
else
{
_graph->operands().at(ind).data(
- std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
}
}
catch (const std::exception &e)
@@ -111,9 +111,9 @@ bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint3
if (type == ANEURALNETWORKS_FULLY_CONNECTED)
{
const auto &input_operand =
- _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
auto &weights_operand =
- _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 &&
weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM)
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
index 63d4e3c09..94b8f02f5 100644
--- a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
@@ -39,6 +39,13 @@ DataType NNAPIConvert::getDataType(OperandCode type)
case ANEURALNETWORKS_BOOL:
case ANEURALNETWORKS_TENSOR_BOOL8:
return DataType::BOOL8;
+ case ANEURALNETWORKS_TENSOR_FLOAT16:
+ case ANEURALNETWORKS_FLOAT16:
+ return DataType::FLOAT16;
+ case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
+ return DataType::QUANT_INT8_SYMM_PER_CHANNEL;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
+ return DataType::QUANT_INT8_ASYMM;
default:
throw std::runtime_error("Unsupported type");
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index a84ce1b8d..9ecb7d190 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -107,7 +107,7 @@ getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivat
}
OperationFactory::Generator getElementwiseBinaryGenerator(
- const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+ const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
{
return [op_type](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2);
@@ -182,7 +182,7 @@ getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::Arith
param.arithmetic_type = op_type;
const auto activation_index = OperandIndex{init_param.inputs[2]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
return new operation::BinaryArithmetic{inputs, outputs, param};
};
@@ -221,12 +221,12 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
const auto activation_index = OperandIndex{init_param.inputs[6]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.kw = getUint32Scalar(operands, kw_index);
param.kh = operands.at(kh_index).asScalar<uint32_t>();
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else // support explicit padding
{
@@ -259,7 +259,7 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
param.kw = getUint32Scalar(operands, kw_index);
param.kh = getUint32Scalar(operands, kh_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
return new operation::Pool2D{inputs, outputs, param};
};
@@ -382,11 +382,11 @@ OperationFactory::OperationFactory()
const auto activation_index = OperandIndex{init_param.inputs[7]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.multiplier = getUint32Scalar(operands, multiplier_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else
{
@@ -417,7 +417,7 @@ OperationFactory::OperationFactory()
param.stride = makeStride(operands, hstride_index, vstride_index);
param.multiplier = getUint32Scalar(operands, multiplier_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
// TODO set dilation
@@ -490,7 +490,7 @@ OperationFactory::OperationFactory()
operation::FullyConnected::Param param;
const auto activation_index = OperandIndex{init_param.inputs[3]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
param.weights_format = FullyConnectedWeightsFormat::Default;
return new operation::FullyConnected{inputs, outputs, param};
@@ -517,7 +517,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_CAST] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
// ANEURALNETWORKS_CAST_EX is deprecated
// TODO Remove ANEURALNETWORKS_CAST_EX
@@ -557,14 +557,14 @@ OperationFactory::OperationFactory()
const auto activation_index = OperandIndex{init_param.inputs[6]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.dilation.width_factor = 1;
param.dilation.height_factor = 1;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else if (init_param.input_count == 10) // support explicit padding
{
@@ -595,7 +595,7 @@ OperationFactory::OperationFactory()
param.dilation.height_factor = 1;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else if (init_param.input_count == 13) // support dilation
{
@@ -633,7 +633,7 @@ OperationFactory::OperationFactory()
param.dilation.height_factor = height_factor;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else
{
@@ -644,19 +644,19 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_ADD] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
_map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
_map[ANEURALNETWORKS_REDUCE_SUM] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
// ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
_map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
_map[ANEURALNETWORKS_SUB] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
_map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -708,7 +708,7 @@ OperationFactory::OperationFactory()
param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
param.shrink_axis_mask =
- operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
+ operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
return new operation::StridedSlice{inputs, outputs, param};
};
@@ -716,7 +716,7 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>;
_map[ANEURALNETWORKS_MUL] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
_map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -758,15 +758,15 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
+ onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
_map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
- _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
+ _map[ANEURALNETWORKS_LOGISTIC] =
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
_map[ANEURALNETWORKS_DIV] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
_map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
@@ -780,16 +780,16 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
_map[ANEURALNETWORKS_GREATER] =
- getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+ getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
_map[ANEURALNETWORKS_GREATER_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
_map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
_map[ANEURALNETWORKS_LESS_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
_map[ANEURALNETWORKS_NOT_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
_map[ANEURALNETWORKS_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
+ getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
// ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -838,13 +838,13 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_REDUCE_ALL] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
_map[ANEURALNETWORKS_REDUCE_ANY] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
_map[ANEURALNETWORKS_REDUCE_MAX] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
// ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
@@ -873,8 +873,8 @@ OperationFactory::OperationFactory()
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
- operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+ _map[ANEURALNETWORKS_LOGICAL_AND] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -902,7 +902,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_RSQRT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -939,8 +939,8 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
_map[ANEURALNETWORKS_RELU] =
- getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
- onert::ir::operation::ElementwiseActivation::infinity, 0);
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+ onert::ir::operation::ElementwiseActivation::infinity, 0);
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -986,10 +986,10 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
_map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1031,13 +1031,13 @@ OperationFactory::OperationFactory()
operation::RNN::Param param;
const auto activation_index = OperandIndex{init_param.inputs[5]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
return new operation::RNN{inputs, outputs, param};
};
_map[ANEURALNETWORKS_FLOOR] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
_map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
Operands &) {
@@ -1169,21 +1169,21 @@ OperationFactory::OperationFactory()
const auto vstride_index = OperandIndex{init_param.inputs[5]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
return new operation::TransposeConv{inputs, outputs, param};
};
_map[ANEURALNETWORKS_SQRT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
// ANEURALNETWORKS_SQRT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SQRT_EX
_map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
- _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
- operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
+ _map[ANEURALNETWORKS_LOGICAL_OR] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
// ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1211,7 +1211,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_LOGICAL_NOT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1370,9 +1370,9 @@ OperationFactory::OperationFactory()
// 2 -> Cell State Out Tensor Index
const OperandIndex scratch_buffer_index;
OperandIndex output_state_index =
- init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
+ init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
OperandIndex cell_state_index =
- init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
+ init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
const OperandIndex output_index = OperandIndex{init_param.outputs[0]};
OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index,
output_index};
@@ -1519,19 +1519,39 @@ OperationFactory::OperationFactory()
// 1 -> Axis Tensor Index
OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- operation::ArgMax::Param param;
+ operation::ArgMinMax::Param param;
// NNAPI ARGMAX output type is always int32
param.output_type = DataType::INT32;
+ param.is_arg_max = true;
- return new operation::ArgMax{inputs, outputs, param};
+ return new operation::ArgMinMax{inputs, outputs, param};
};
// ANEURALNETWORKS_ARGMAX_EX is deprecated
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
+ _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::ArgMinMax::Param param;
+ // NNAPI ARGMIN output type is always int32
+ param.output_type = DataType::INT32;
+ param.is_arg_max = false;
+
+ return new operation::ArgMinMax{inputs, outputs, param};
+ };
+
_map[ANEURALNETWORKS_DEQUANTIZE] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1608,7 +1628,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_REDUCE_MIN] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
// ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
@@ -1689,10 +1709,10 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
_map[ANEURALNETWORKS_MINIMUM] =
- getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
_map[ANEURALNETWORKS_MAXIMUM] =
- getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -1719,7 +1739,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_COS_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
_map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
@@ -1733,10 +1753,10 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_REDUCE_PROD] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
_map[ANEURALNETWORKS_ROUND_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
_map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1764,7 +1784,7 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
_map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Multiple Tensor Index
@@ -1904,7 +1924,7 @@ OperationFactory::OperationFactory()
};
_map[ANEURALNETWORKS_QUANTIZE] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
index 367cf74db..74e187421 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
@@ -40,7 +40,7 @@ public:
public:
using Generator =
- std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
+ std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
public:
static OperationFactory &get();
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
index c6e9147cd..8e1b84e29 100644
--- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -1710,9 +1710,8 @@ enum ActivationFunctionType
inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
{
static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+ ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
return values;
}
@@ -1768,8 +1767,8 @@ enum FullyConnectedOptionsWeightsFormat
inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
{
static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+ FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
return values;
}
@@ -1981,8 +1980,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
const CustomQuantization *details_as_CustomQuantization() const
{
return details_type() == QuantizationDetails_CustomQuantization
- ? static_cast<const CustomQuantization *>(details())
- : nullptr;
+ ? static_cast<const CustomQuantization *>(details())
+ : nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
@@ -2072,17 +2071,17 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
}
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
return onert_tflite::CreateQuantizationParameters(
- _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
- scale ? _fbb.CreateVector<float>(*scale) : 0,
- zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
- quantized_dimension);
+ _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
+ scale ? _fbb.CreateVector<float>(*scale) : 0,
+ zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
+ quantized_dimension);
}
struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2272,20 +2271,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const Int32Vector *array_segments_as_Int32Vector() const
{
return array_segments_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Int32Vector *>(array_segments())
+ : nullptr;
}
const Uint16Vector *array_segments_as_Uint16Vector() const
{
return array_segments_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Uint16Vector *>(array_segments())
+ : nullptr;
}
const Uint8Vector *array_segments_as_Uint8Vector() const
{
return array_segments_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Uint8Vector *>(array_segments())
+ : nullptr;
}
SparseIndexVector array_indices_type() const
{
@@ -2296,20 +2295,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const Int32Vector *array_indices_as_Int32Vector() const
{
return array_indices_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Int32Vector *>(array_indices())
+ : nullptr;
}
const Uint16Vector *array_indices_as_Uint16Vector() const
{
return array_indices_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Uint16Vector *>(array_indices())
+ : nullptr;
}
const Uint8Vector *array_indices_as_Uint8Vector() const
{
return array_indices_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Uint8Vector *>(array_indices())
+ : nullptr;
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -2435,7 +2434,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
- VT_DIM_METADATA);
+ VT_DIM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
@@ -2460,7 +2459,7 @@ struct SparsityParametersBuilder
fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
}
void add_dim_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
{
fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
}
@@ -2478,11 +2477,10 @@ struct SparsityParametersBuilder
};
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
- 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0)
{
SparsityParametersBuilder builder_(_fbb);
builder_.add_dim_metadata(dim_metadata);
@@ -2492,14 +2490,14 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
}
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
- const std::vector<int32_t> *block_map = nullptr,
- const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
{
return onert_tflite::CreateSparsityParameters(
- _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
- block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
- dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+ _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
+ block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
}
struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2619,16 +2617,16 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<SparsityParameters> sparsity = 0,
- const std::vector<int32_t> *shape_signature = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
{
return onert_tflite::CreateTensor(
- _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
- name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
- shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+ _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+ name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
+ shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
}
struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2890,10 +2888,10 @@ struct DepthwiseConv2DOptionsBuilder
};
inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
- int32_t stride_h = 0, int32_t depth_multiplier = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
+ int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
DepthwiseConv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
@@ -2942,12 +2940,12 @@ struct ConcatEmbeddingsOptionsBuilder
fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
}
void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
}
void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
embedding_dim_per_channel);
@@ -2966,9 +2964,9 @@ struct ConcatEmbeddingsOptionsBuilder
};
inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
{
ConcatEmbeddingsOptionsBuilder builder_(_fbb);
builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -2983,9 +2981,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
{
return onert_tflite::CreateConcatEmbeddingsOptions(
- _fbb, num_channels,
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+ _fbb, num_channels,
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
}
struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -3219,9 +3217,9 @@ struct SequenceRNNOptionsBuilder
};
inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3296,9 +3294,9 @@ struct BidirectionalSequenceRNNOptionsBuilder
};
inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3378,10 +3376,10 @@ struct FullyConnectedOptionsBuilder
};
inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
- bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
{
FullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3474,8 +3472,8 @@ struct ConcatenationOptionsBuilder
};
inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
{
ConcatenationOptionsBuilder builder_(_fbb);
builder_.add_axis(axis);
@@ -3669,7 +3667,7 @@ struct LocalResponseNormalizationOptionsBuilder
fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
}
explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -3845,7 +3843,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -3861,10 +3859,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
{
UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
@@ -3943,7 +3941,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
@@ -3958,10 +3956,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
};
inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
- bool time_major = true, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
@@ -4844,7 +4842,7 @@ CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
const std::vector<int32_t> *squeeze_dims = nullptr)
{
return onert_tflite::CreateSqueezeOptions(
- _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+ _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
}
struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -7206,7 +7204,7 @@ CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
const char *custom_code = nullptr, int32_t version = 1)
{
return onert_tflite::CreateOperatorCode(
- _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+ _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
}
struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -7241,611 +7239,611 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const Conv2DOptions *builtin_options_as_Conv2DOptions() const
{
return builtin_options_type() == BuiltinOptions_Conv2DOptions
- ? static_cast<const Conv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const Conv2DOptions *>(builtin_options())
+ : nullptr;
}
const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
{
return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
}
const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
{
return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
}
const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
{
return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
- ? static_cast<const LSHProjectionOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LSHProjectionOptions *>(builtin_options())
+ : nullptr;
}
const Pool2DOptions *builtin_options_as_Pool2DOptions() const
{
return builtin_options_type() == BuiltinOptions_Pool2DOptions
- ? static_cast<const Pool2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const Pool2DOptions *>(builtin_options())
+ : nullptr;
}
const SVDFOptions *builtin_options_as_SVDFOptions() const
{
return builtin_options_type() == BuiltinOptions_SVDFOptions
- ? static_cast<const SVDFOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SVDFOptions *>(builtin_options())
+ : nullptr;
}
const RNNOptions *builtin_options_as_RNNOptions() const
{
return builtin_options_type() == BuiltinOptions_RNNOptions
- ? static_cast<const RNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RNNOptions *>(builtin_options())
+ : nullptr;
}
const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
{
return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
- ? static_cast<const FullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
{
return builtin_options_type() == BuiltinOptions_SoftmaxOptions
- ? static_cast<const SoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
{
return builtin_options_type() == BuiltinOptions_ConcatenationOptions
- ? static_cast<const ConcatenationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ConcatenationOptions *>(builtin_options())
+ : nullptr;
}
const AddOptions *builtin_options_as_AddOptions() const
{
return builtin_options_type() == BuiltinOptions_AddOptions
- ? static_cast<const AddOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AddOptions *>(builtin_options())
+ : nullptr;
}
const L2NormOptions *builtin_options_as_L2NormOptions() const
{
return builtin_options_type() == BuiltinOptions_L2NormOptions
- ? static_cast<const L2NormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const L2NormOptions *>(builtin_options())
+ : nullptr;
}
const LocalResponseNormalizationOptions *
builtin_options_as_LocalResponseNormalizationOptions() const
{
return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
}
const LSTMOptions *builtin_options_as_LSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_LSTMOptions
- ? static_cast<const LSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LSTMOptions *>(builtin_options())
+ : nullptr;
}
const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
{
return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const ResizeBilinearOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
}
const CallOptions *builtin_options_as_CallOptions() const
{
return builtin_options_type() == BuiltinOptions_CallOptions
- ? static_cast<const CallOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CallOptions *>(builtin_options())
+ : nullptr;
}
const ReshapeOptions *builtin_options_as_ReshapeOptions() const
{
return builtin_options_type() == BuiltinOptions_ReshapeOptions
- ? static_cast<const ReshapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReshapeOptions *>(builtin_options())
+ : nullptr;
}
const SkipGramOptions *builtin_options_as_SkipGramOptions() const
{
return builtin_options_type() == BuiltinOptions_SkipGramOptions
- ? static_cast<const SkipGramOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SkipGramOptions *>(builtin_options())
+ : nullptr;
}
const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
{
return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const SpaceToDepthOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
}
const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
{
return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
}
const MulOptions *builtin_options_as_MulOptions() const
{
return builtin_options_type() == BuiltinOptions_MulOptions
- ? static_cast<const MulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MulOptions *>(builtin_options())
+ : nullptr;
}
const PadOptions *builtin_options_as_PadOptions() const
{
return builtin_options_type() == BuiltinOptions_PadOptions
- ? static_cast<const PadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PadOptions *>(builtin_options())
+ : nullptr;
}
const GatherOptions *builtin_options_as_GatherOptions() const
{
return builtin_options_type() == BuiltinOptions_GatherOptions
- ? static_cast<const GatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GatherOptions *>(builtin_options())
+ : nullptr;
}
const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
{
return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
}
const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
{
return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
}
const TransposeOptions *builtin_options_as_TransposeOptions() const
{
return builtin_options_type() == BuiltinOptions_TransposeOptions
- ? static_cast<const TransposeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TransposeOptions *>(builtin_options())
+ : nullptr;
}
const ReducerOptions *builtin_options_as_ReducerOptions() const
{
return builtin_options_type() == BuiltinOptions_ReducerOptions
- ? static_cast<const ReducerOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReducerOptions *>(builtin_options())
+ : nullptr;
}
const SubOptions *builtin_options_as_SubOptions() const
{
return builtin_options_type() == BuiltinOptions_SubOptions
- ? static_cast<const SubOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SubOptions *>(builtin_options())
+ : nullptr;
}
const DivOptions *builtin_options_as_DivOptions() const
{
return builtin_options_type() == BuiltinOptions_DivOptions
- ? static_cast<const DivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DivOptions *>(builtin_options())
+ : nullptr;
}
const SqueezeOptions *builtin_options_as_SqueezeOptions() const
{
return builtin_options_type() == BuiltinOptions_SqueezeOptions
- ? static_cast<const SqueezeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SqueezeOptions *>(builtin_options())
+ : nullptr;
}
const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
{
return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
- ? static_cast<const SequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
{
return builtin_options_type() == BuiltinOptions_StridedSliceOptions
- ? static_cast<const StridedSliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const StridedSliceOptions *>(builtin_options())
+ : nullptr;
}
const ExpOptions *builtin_options_as_ExpOptions() const
{
return builtin_options_type() == BuiltinOptions_ExpOptions
- ? static_cast<const ExpOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ExpOptions *>(builtin_options())
+ : nullptr;
}
const TopKV2Options *builtin_options_as_TopKV2Options() const
{
return builtin_options_type() == BuiltinOptions_TopKV2Options
- ? static_cast<const TopKV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const TopKV2Options *>(builtin_options())
+ : nullptr;
}
const SplitOptions *builtin_options_as_SplitOptions() const
{
return builtin_options_type() == BuiltinOptions_SplitOptions
- ? static_cast<const SplitOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SplitOptions *>(builtin_options())
+ : nullptr;
}
const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
{
return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const LogSoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const CastOptions *builtin_options_as_CastOptions() const
{
return builtin_options_type() == BuiltinOptions_CastOptions
- ? static_cast<const CastOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CastOptions *>(builtin_options())
+ : nullptr;
}
const DequantizeOptions *builtin_options_as_DequantizeOptions() const
{
return builtin_options_type() == BuiltinOptions_DequantizeOptions
- ? static_cast<const DequantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DequantizeOptions *>(builtin_options())
+ : nullptr;
}
const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
{
return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const MaximumMinimumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
}
const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
{
return builtin_options_type() == BuiltinOptions_ArgMaxOptions
- ? static_cast<const ArgMaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ArgMaxOptions *>(builtin_options())
+ : nullptr;
}
const LessOptions *builtin_options_as_LessOptions() const
{
return builtin_options_type() == BuiltinOptions_LessOptions
- ? static_cast<const LessOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LessOptions *>(builtin_options())
+ : nullptr;
}
const NegOptions *builtin_options_as_NegOptions() const
{
return builtin_options_type() == BuiltinOptions_NegOptions
- ? static_cast<const NegOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const NegOptions *>(builtin_options())
+ : nullptr;
}
const PadV2Options *builtin_options_as_PadV2Options() const
{
return builtin_options_type() == BuiltinOptions_PadV2Options
- ? static_cast<const PadV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const PadV2Options *>(builtin_options())
+ : nullptr;
}
const GreaterOptions *builtin_options_as_GreaterOptions() const
{
return builtin_options_type() == BuiltinOptions_GreaterOptions
- ? static_cast<const GreaterOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GreaterOptions *>(builtin_options())
+ : nullptr;
}
const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
- ? static_cast<const GreaterEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GreaterEqualOptions *>(builtin_options())
+ : nullptr;
}
const LessEqualOptions *builtin_options_as_LessEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_LessEqualOptions
- ? static_cast<const LessEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LessEqualOptions *>(builtin_options())
+ : nullptr;
}
const SelectOptions *builtin_options_as_SelectOptions() const
{
return builtin_options_type() == BuiltinOptions_SelectOptions
- ? static_cast<const SelectOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SelectOptions *>(builtin_options())
+ : nullptr;
}
const SliceOptions *builtin_options_as_SliceOptions() const
{
return builtin_options_type() == BuiltinOptions_SliceOptions
- ? static_cast<const SliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SliceOptions *>(builtin_options())
+ : nullptr;
}
const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
{
return builtin_options_type() == BuiltinOptions_TransposeConvOptions
- ? static_cast<const TransposeConvOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TransposeConvOptions *>(builtin_options())
+ : nullptr;
}
const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
{
return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
- ? static_cast<const SparseToDenseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SparseToDenseOptions *>(builtin_options())
+ : nullptr;
}
const TileOptions *builtin_options_as_TileOptions() const
{
return builtin_options_type() == BuiltinOptions_TileOptions
- ? static_cast<const TileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TileOptions *>(builtin_options())
+ : nullptr;
}
const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
{
return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
- ? static_cast<const ExpandDimsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ExpandDimsOptions *>(builtin_options())
+ : nullptr;
}
const EqualOptions *builtin_options_as_EqualOptions() const
{
return builtin_options_type() == BuiltinOptions_EqualOptions
- ? static_cast<const EqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const EqualOptions *>(builtin_options())
+ : nullptr;
}
const NotEqualOptions *builtin_options_as_NotEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_NotEqualOptions
- ? static_cast<const NotEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const NotEqualOptions *>(builtin_options())
+ : nullptr;
}
const ShapeOptions *builtin_options_as_ShapeOptions() const
{
return builtin_options_type() == BuiltinOptions_ShapeOptions
- ? static_cast<const ShapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ShapeOptions *>(builtin_options())
+ : nullptr;
}
const PowOptions *builtin_options_as_PowOptions() const
{
return builtin_options_type() == BuiltinOptions_PowOptions
- ? static_cast<const PowOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PowOptions *>(builtin_options())
+ : nullptr;
}
const ArgMinOptions *builtin_options_as_ArgMinOptions() const
{
return builtin_options_type() == BuiltinOptions_ArgMinOptions
- ? static_cast<const ArgMinOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ArgMinOptions *>(builtin_options())
+ : nullptr;
}
const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
{
return builtin_options_type() == BuiltinOptions_FakeQuantOptions
- ? static_cast<const FakeQuantOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FakeQuantOptions *>(builtin_options())
+ : nullptr;
}
const PackOptions *builtin_options_as_PackOptions() const
{
return builtin_options_type() == BuiltinOptions_PackOptions
- ? static_cast<const PackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PackOptions *>(builtin_options())
+ : nullptr;
}
const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalOrOptions
- ? static_cast<const LogicalOrOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalOrOptions *>(builtin_options())
+ : nullptr;
}
const OneHotOptions *builtin_options_as_OneHotOptions() const
{
return builtin_options_type() == BuiltinOptions_OneHotOptions
- ? static_cast<const OneHotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const OneHotOptions *>(builtin_options())
+ : nullptr;
}
const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalAndOptions
- ? static_cast<const LogicalAndOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalAndOptions *>(builtin_options())
+ : nullptr;
}
const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalNotOptions
- ? static_cast<const LogicalNotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalNotOptions *>(builtin_options())
+ : nullptr;
}
const UnpackOptions *builtin_options_as_UnpackOptions() const
{
return builtin_options_type() == BuiltinOptions_UnpackOptions
- ? static_cast<const UnpackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UnpackOptions *>(builtin_options())
+ : nullptr;
}
const FloorDivOptions *builtin_options_as_FloorDivOptions() const
{
return builtin_options_type() == BuiltinOptions_FloorDivOptions
- ? static_cast<const FloorDivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FloorDivOptions *>(builtin_options())
+ : nullptr;
}
const SquareOptions *builtin_options_as_SquareOptions() const
{
return builtin_options_type() == BuiltinOptions_SquareOptions
- ? static_cast<const SquareOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SquareOptions *>(builtin_options())
+ : nullptr;
}
const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
{
return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
- ? static_cast<const ZerosLikeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ZerosLikeOptions *>(builtin_options())
+ : nullptr;
}
const FillOptions *builtin_options_as_FillOptions() const
{
return builtin_options_type() == BuiltinOptions_FillOptions
- ? static_cast<const FillOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FillOptions *>(builtin_options())
+ : nullptr;
}
const BidirectionalSequenceLSTMOptions *
builtin_options_as_BidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
{
return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const UnidirectionalSequenceLSTMOptions *
builtin_options_as_UnidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const FloorModOptions *builtin_options_as_FloorModOptions() const
{
return builtin_options_type() == BuiltinOptions_FloorModOptions
- ? static_cast<const FloorModOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FloorModOptions *>(builtin_options())
+ : nullptr;
}
const RangeOptions *builtin_options_as_RangeOptions() const
{
return builtin_options_type() == BuiltinOptions_RangeOptions
- ? static_cast<const RangeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RangeOptions *>(builtin_options())
+ : nullptr;
}
const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
{
return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
}
const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
{
return builtin_options_type() == BuiltinOptions_LeakyReluOptions
- ? static_cast<const LeakyReluOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LeakyReluOptions *>(builtin_options())
+ : nullptr;
}
const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
{
return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
}
const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
{
return builtin_options_type() == BuiltinOptions_MirrorPadOptions
- ? static_cast<const MirrorPadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MirrorPadOptions *>(builtin_options())
+ : nullptr;
}
const AbsOptions *builtin_options_as_AbsOptions() const
{
return builtin_options_type() == BuiltinOptions_AbsOptions
- ? static_cast<const AbsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AbsOptions *>(builtin_options())
+ : nullptr;
}
const SplitVOptions *builtin_options_as_SplitVOptions() const
{
return builtin_options_type() == BuiltinOptions_SplitVOptions
- ? static_cast<const SplitVOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SplitVOptions *>(builtin_options())
+ : nullptr;
}
const UniqueOptions *builtin_options_as_UniqueOptions() const
{
return builtin_options_type() == BuiltinOptions_UniqueOptions
- ? static_cast<const UniqueOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UniqueOptions *>(builtin_options())
+ : nullptr;
}
const ReverseV2Options *builtin_options_as_ReverseV2Options() const
{
return builtin_options_type() == BuiltinOptions_ReverseV2Options
- ? static_cast<const ReverseV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReverseV2Options *>(builtin_options())
+ : nullptr;
}
const AddNOptions *builtin_options_as_AddNOptions() const
{
return builtin_options_type() == BuiltinOptions_AddNOptions
- ? static_cast<const AddNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AddNOptions *>(builtin_options())
+ : nullptr;
}
const GatherNdOptions *builtin_options_as_GatherNdOptions() const
{
return builtin_options_type() == BuiltinOptions_GatherNdOptions
- ? static_cast<const GatherNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GatherNdOptions *>(builtin_options())
+ : nullptr;
}
const CosOptions *builtin_options_as_CosOptions() const
{
return builtin_options_type() == BuiltinOptions_CosOptions
- ? static_cast<const CosOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CosOptions *>(builtin_options())
+ : nullptr;
}
const WhereOptions *builtin_options_as_WhereOptions() const
{
return builtin_options_type() == BuiltinOptions_WhereOptions
- ? static_cast<const WhereOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const WhereOptions *>(builtin_options())
+ : nullptr;
}
const RankOptions *builtin_options_as_RankOptions() const
{
return builtin_options_type() == BuiltinOptions_RankOptions
- ? static_cast<const RankOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RankOptions *>(builtin_options())
+ : nullptr;
}
const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
{
return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
- ? static_cast<const ReverseSequenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
}
const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
{
return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
- ? static_cast<const MatrixDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MatrixDiagOptions *>(builtin_options())
+ : nullptr;
}
const QuantizeOptions *builtin_options_as_QuantizeOptions() const
{
return builtin_options_type() == BuiltinOptions_QuantizeOptions
- ? static_cast<const QuantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const QuantizeOptions *>(builtin_options())
+ : nullptr;
}
const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
{
return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
- ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
}
const HardSwishOptions *builtin_options_as_HardSwishOptions() const
{
return builtin_options_type() == BuiltinOptions_HardSwishOptions
- ? static_cast<const HardSwishOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const HardSwishOptions *>(builtin_options())
+ : nullptr;
}
const IfOptions *builtin_options_as_IfOptions() const
{
return builtin_options_type() == BuiltinOptions_IfOptions
- ? static_cast<const IfOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const IfOptions *>(builtin_options())
+ : nullptr;
}
const WhileOptions *builtin_options_as_WhileOptions() const
{
return builtin_options_type() == BuiltinOptions_WhileOptions
- ? static_cast<const WhileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const WhileOptions *>(builtin_options())
+ : nullptr;
}
const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
{
return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
- ? static_cast<const DepthToSpaceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
}
const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
{
return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
- ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
}
const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
{
return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
- ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
}
const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
{
return builtin_options_type() == BuiltinOptions_ScatterNdOptions
- ? static_cast<const ScatterNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ScatterNdOptions *>(builtin_options())
+ : nullptr;
}
const SelectV2Options *builtin_options_as_SelectV2Options() const
{
return builtin_options_type() == BuiltinOptions_SelectV2Options
- ? static_cast<const SelectV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const SelectV2Options *>(builtin_options())
+ : nullptr;
}
const DensifyOptions *builtin_options_as_DensifyOptions() const
{
return builtin_options_type() == BuiltinOptions_DensifyOptions
- ? static_cast<const DensifyOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DensifyOptions *>(builtin_options())
+ : nullptr;
}
const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
{
return builtin_options_type() == BuiltinOptions_SegmentSumOptions
- ? static_cast<const SegmentSumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SegmentSumOptions *>(builtin_options())
+ : nullptr;
}
const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
{
return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
- ? static_cast<const BatchMatMulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BatchMatMulOptions *>(builtin_options())
+ : nullptr;
}
const flatbuffers::Vector<uint8_t> *custom_options() const
{
@@ -8457,7 +8455,7 @@ struct OperatorBuilder
static_cast<int8_t>(custom_options_format), 0);
}
void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
{
fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
}
@@ -8514,11 +8512,11 @@ CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index
const std::vector<int32_t> *intermediates = nullptr)
{
return onert_tflite::CreateOperator(
- _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
- custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
- intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+ _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
+ custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
+ intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
}
struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8602,12 +8600,12 @@ struct SubGraphBuilder
};
inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0)
{
SubGraphBuilder builder_(_fbb);
builder_.add_name(name);
@@ -8618,20 +8616,18 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
return builder_.Finish();
}
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
- const char *name = nullptr)
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr)
{
return onert_tflite::CreateSubGraph(
- _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
- inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
- operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
- name ? _fbb.CreateString(name) : 0);
+ _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+ inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+ operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+ name ? _fbb.CreateString(name) : 0);
}
struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8762,7 +8758,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
- VT_OPERATOR_CODES);
+ VT_OPERATOR_CODES);
}
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
{
@@ -8805,7 +8801,7 @@ struct ModelBuilder
flatbuffers::uoffset_t start_;
void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
{
fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
}
@@ -8845,13 +8841,13 @@ struct ModelBuilder
};
inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
{
ModelBuilder builder_(_fbb);
builder_.add_metadata(metadata);
@@ -8874,13 +8870,13 @@ CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
{
return onert_tflite::CreateModel(
- _fbb, version,
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
- description ? _fbb.CreateString(description) : 0,
- buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
- metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
- metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+ _fbb, version,
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+ description ? _fbb.CreateString(description) : 0,
+ buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
+ metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
}
inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
diff --git a/runtime/onert/sample/.clang-format b/runtime/onert/sample/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/sample/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/onert/test/.clang-format b/runtime/onert/test/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/test/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8 \ No newline at end of file
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
index 50f3964db..c77ebb895 100644
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/HEScheduler.cc
@@ -55,8 +55,7 @@ struct MockBackendCPU : public Backend
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
@@ -79,8 +78,7 @@ struct MockBackendGPU : public Backend
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
@@ -103,8 +101,7 @@ struct MockBackendNPU : public Backend
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
@@ -165,7 +162,7 @@ void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
for (auto &backend : backends)
setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
}
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// Set permute time from one backend to another. This method is needed since ExecutionTime has only
@@ -195,7 +192,7 @@ void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
}
}
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
//
@@ -304,7 +301,7 @@ std::shared_ptr<Graph> createBranchedGraph()
//
// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class SchedulerTest : public ::testing::Test
+class HESchedulerTest : public ::testing::Test
{
protected:
void SetUp() override
@@ -359,8 +356,8 @@ protected:
std::string _original_profiling_mode;
};
-class SchedulerTestWithExecutorParam : public SchedulerTest,
- public testing::WithParamInterface<std::string>
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+ public testing::WithParamInterface<std::string>
{
};
@@ -369,7 +366,7 @@ class SchedulerTestWithExecutorParam : public SchedulerTest,
//
// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
{
setExecutor(GetParam());
@@ -392,7 +389,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
@@ -422,7 +419,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
}
// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
{
const int64_t NPU_ET = 5000;
setExecutor(GetParam());
@@ -432,7 +429,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
auto graph(createBranchedGraph());
subgs.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
+ sub_op_idx(5);
// Set default execution and transfer time
setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
@@ -451,7 +448,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
@@ -463,7 +460,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
if (GetParam() == PARALLEL)
{
branch1_expected_backend =
- br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+ br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
}
@@ -486,7 +483,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
* branching or scheduler assigns another backend to a node*/
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
@@ -504,11 +501,11 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
+INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
testing::Values(LINEAR, DATAFLOW, PARALLEL));
// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(SchedulerTest, branched_graph_profiling_mode)
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
{
const int ET = 1e5;
@@ -521,7 +518,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
auto graph(createBranchedGraph());
subgs.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
+ sub_op_idx(5);
// Test 1
// Expected behaviour: scheduler assigns backends to nodes with unknown execution time
@@ -537,7 +534,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
@@ -560,7 +557,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
index 806b47ecc..0e742e1e4 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -21,6 +21,7 @@
#include "compiler/Compiler.h"
#include "exec/Execution.h"
#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
namespace
{
@@ -51,8 +52,8 @@ public:
auto operand_rhs2 = graph->addOperand(shape, type);
auto operand_result2 = graph->addOperand(shape, type);
graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
operation::BinaryArithmetic::Param param1;
param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
@@ -60,14 +61,14 @@ public:
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
operation::BinaryArithmetic::Param param2;
param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
@@ -77,13 +78,15 @@ public:
// Compile
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- onert::compiler::Compiler compiler{subgs};
+ tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+ onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
executors = compiler.compile();
}
public:
std::shared_ptr<Graph> graph;
std::shared_ptr<onert::exec::ExecutorMap> executors;
+ std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
};
TEST(ExecInstance, simple)
@@ -137,7 +140,8 @@ TEST(ExecInstance, twoCompile)
// Make new executor: compile again
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- onert::compiler::Compiler compiler{subgs};
+ auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+ onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
onert::exec::Execution execution2{executors2};
@@ -205,7 +209,7 @@ class Inference
public:
Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
std::shared_ptr<onert::exec::ExecutorMap> &executors)
- : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+ : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
{
// DO NOTHING
}
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
index 8c2e34df8..6b0c35a79 100644
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ b/runtime/onert/test/core/exec/ExecTime.test.cc
@@ -62,7 +62,7 @@ TEST(ExecTime, roundtrip_ok)
et.updateOperationExecTime(b, "op1", true, 100, 100);
et.updateOperationExecTime(b, "op1", true, 200, 200);
et.updateOperationExecTime(b, "op1", false, 100, 888);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
{
ExecTime et(bs);
@@ -73,7 +73,7 @@ TEST(ExecTime, roundtrip_ok)
ASSERT_EQ(time, 150);
time = et.getOperationExecTime(b, "op1", false, 100);
ASSERT_EQ(time, 888);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// clean up
EXPECT_EQ(remove("exec_time.json"), 0);
@@ -88,7 +88,7 @@ TEST(ExecTime, structure)
ExecTime et(bs);
et.updateOperationExecTime(b, "op1", true, 100, 100);
et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
{
ExecTime et(bs);
@@ -97,7 +97,7 @@ TEST(ExecTime, structure)
// Check interpolation
time = et.getOperationExecTime(b, "op1", true, 200);
ASSERT_EQ(time, 200);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// clean up
EXPECT_EQ(remove("exec_time.json"), 0);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
index 0c7b1b762..327c38f79 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -63,7 +63,7 @@ protected:
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
@@ -79,7 +79,7 @@ protected:
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void CreateTwoStepModel()
@@ -109,8 +109,8 @@ protected:
auto operand_rhs2 = _graph->addOperand(shape, type);
auto operand_result2 = _graph->addOperand(shape, type);
_graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
@@ -120,7 +120,7 @@ protected:
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
operation::BinaryArithmetic::Param param2;
param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
@@ -128,7 +128,7 @@ protected:
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
@@ -144,7 +144,7 @@ protected:
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void CreateUnspecifiedDimensionsModel()
@@ -168,9 +168,8 @@ protected:
auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
_graph->operands()
- .at(operand_activation)
- .data(
- std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+ .at(operand_activation)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
auto operand_result = _graph->addOperand(shape, type);
@@ -182,7 +181,7 @@ protected:
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
@@ -198,7 +197,7 @@ protected:
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void createExecution() { _execution = std::make_unique<Execution>(_executors); }
diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h
index 60b4719ed..0e7ed977b 100644
--- a/runtime/onert/test/graph/MockNode.h
+++ b/runtime/onert/test/graph/MockNode.h
@@ -30,7 +30,7 @@ class SimpleMock : public onert::ir::Operation
public:
SimpleMock(const onert::ir::OperandIndexSequence &inputs,
const onert::ir::OperandIndexSequence &outputs)
- : Operation{onert::ir::OperandConstraint::createAny()}
+ : Operation{onert::ir::OperandConstraint::createAny()}
{
setInputs(inputs);
setOutputs(outputs);
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc
index 206e402ed..5ef10027e 100644
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ b/runtime/onert/test/graph/operand/UseDef.cc
@@ -49,16 +49,16 @@ TEST(ir_Operand, neg_usedef)
// MockNode1
auto operand_index1 = graph.addOperand(shape, type);
auto mocknode_index1 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
// MockNode2
auto operand_index2 = graph.addOperand(shape, type);
auto mocknode_index2 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
// MockNode3(two input)
auto multiinput_index = graph.addOperation(
- std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
graph.finishBuilding();
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index f1cbfd692..2ecaa2885 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -48,7 +48,7 @@ TEST(ShapeInference, Pool2DNodeSame)
Padding padding{PaddingType::SAME};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -58,7 +58,7 @@ TEST(ShapeInference, Pool2DNodeSame)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -75,7 +75,7 @@ TEST(ShapeInference, Pool2DNodeValid)
Padding padding{PaddingType::VALID};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -85,7 +85,7 @@ TEST(ShapeInference, Pool2DNodeValid)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -103,7 +103,7 @@ TEST(ShapeInference, Pool2DNodeExplicit)
Padding padding{4, 3, 2, 1};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -113,7 +113,7 @@ TEST(ShapeInference, Pool2DNodeExplicit)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -130,7 +130,7 @@ TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
Padding padding{PaddingType::SAME};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
std::runtime_error);
}
@@ -161,7 +161,7 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
param =
- operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -190,7 +190,7 @@ TEST(ShapeInference, DepthwiseConv2D)
operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
Activation::NONE, Dilation{1, 1}};
auto infered_out_shape =
- onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -364,7 +364,7 @@ TEST(ShapeInference, Transpose)
ASSERT_EQ(in_shape.rank(), perm.size());
ASSERT_EQ(expected.rank(), perm.size());
auto inferred_out_shape =
- onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
// post-conditions
ASSERT_EQ(inferred_out_shape.rank(), perm.size());
for (int32_t dim = 0; dim < expected.rank(); dim++)
@@ -479,8 +479,8 @@ TEST(ShapeInference, BCQFullyConnected)
{
auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
Shape &expected) {
- auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape,
- cluster.data());
+ auto actual =
+ onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
ASSERT_EQ(actual.rank(), expected.rank());
for (int32_t dim = 0; dim < expected.rank(); dim++)