summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-10-28 12:16:55 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-10-28 12:16:55 +0900
commitc55f8a6db48cda9d3a78048338b7f18c4cca62b8 (patch)
tree761ee8e171e5203f5c598ad93b2e7e0bc2e31aa2 /runtime
parent74476a2d0296bdad70a2f7f90bc7419a8b05bffd (diff)
downloadnnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.gz
nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.bz2
nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.zip
Diffstat (limited to 'runtime')
-rw-r--r--runtime/contrib/android/api/Android.mk4
-rw-r--r--runtime/contrib/android/api/Prebuilt.mk70
-rw-r--r--runtime/contrib/android/api/build.gradle32
-rw-r--r--runtime/contrib/android/api/prebuilt/Android.mk9
-rw-r--r--runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk7
l---------runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so1
-rw-r--r--runtime/contrib/android/api/prebuilt/circle_loader/Android.mk7
l---------runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so1
-rw-r--r--runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk7
l---------runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so1
-rw-r--r--runtime/contrib/android/api/prebuilt/onert_core/Android.mk7
l---------runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so1
-rw-r--r--runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk7
l---------runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so1
-rw-r--r--runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk7
l---------runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so1
-rw-r--r--runtime/contrib/android/api/src/main/native/onert-native-api.cpp3
-rw-r--r--runtime/contrib/android_benchmark_app/CMakeLists.txt2
-rw-r--r--runtime/libs/benchmark/include/benchmark/MemoryInfo.h (renamed from runtime/libs/ndarray/src/ContiguousSpan.cpp)33
-rw-r--r--runtime/libs/benchmark/include/benchmark/MemoryPoller.h4
-rw-r--r--runtime/libs/benchmark/include/benchmark/Phases.h5
-rw-r--r--runtime/libs/benchmark/include/benchmark/Result.h2
-rw-r--r--runtime/libs/benchmark/src/MemoryInfo.cpp169
-rw-r--r--runtime/libs/benchmark/src/MemoryPoller.cpp163
-rw-r--r--runtime/libs/benchmark/src/Phases.cpp8
-rw-r--r--runtime/libs/benchmark/src/Result.cpp12
-rw-r--r--runtime/libs/misc/include/misc/polymorphic_downcast.h2
-rw-r--r--runtime/libs/ndarray/CMakeLists.txt19
-rw-r--r--runtime/libs/ndarray/example/CMakeLists.txt4
-rw-r--r--runtime/libs/ndarray/example/example_array.cpp76
-rw-r--r--runtime/libs/ndarray/example/example_no_array.cpp85
-rw-r--r--runtime/libs/ndarray/include/ndarray/Array.h195
-rw-r--r--runtime/libs/ndarray/include/ndarray/Common.h22
-rw-r--r--runtime/libs/ndarray/include/ndarray/ContiguousSpan.h108
-rw-r--r--runtime/libs/ndarray/include/ndarray/Shape.h66
-rw-r--r--runtime/libs/ndarray/src/detail/cxx14.h67
-rw-r--r--runtime/libs/ndarray/test/CMakeLists.txt17
-rw-r--r--runtime/libs/ndarray/test/ndarray_test.cpp92
-rw-r--r--runtime/libs/nnapi/CMakeLists.txt5
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksExShim.h (renamed from runtime/libs/nnapi/v1.2/include/NeuralNetworksExShim.h)0
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksLoadHelpers.h (renamed from runtime/libs/nnapi/v1.2/include/NeuralNetworksLoadHelpers.h)0
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksShim.h (renamed from runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h)0
-rw-r--r--runtime/libs/nnapi/include/NeuralNetworksTypes.h (renamed from runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h)0
-rw-r--r--runtime/libs/nnapi/v1.1/CMakeLists.txt4
-rw-r--r--runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h64
-rw-r--r--runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h141
-rw-r--r--runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h709
-rw-r--r--runtime/libs/nnapi/v1.2/CMakeLists.txt4
-rw-r--r--runtime/nnapi-header/include/NeuralNetworks.h2464
-rw-r--r--runtime/nnapi-header/include/NeuralNetworksExtensions.h10
-rw-r--r--runtime/onert/api/CMakeLists.txt8
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc11
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc45
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h1
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc505
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h3
-rw-r--r--runtime/onert/backend/acl_common/AclKernelGen.h82
-rw-r--r--runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h10
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h10
-rw-r--r--runtime/onert/backend/acl_common/AclTensorRegistry.h12
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc25
-rw-r--r--runtime/onert/backend/acl_common/Convert.h3
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc362
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h5
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc319
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.cc11
-rw-r--r--runtime/onert/backend/cpu/Tensor.cc (renamed from runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h)16
-rw-r--r--runtime/onert/backend/cpu/Tensor.h16
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc10
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h4
-rw-r--r--runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc23
-rw-r--r--runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h8
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc13
-rw-r--r--runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc48
-rw-r--r--runtime/onert/backend/cpu/ops/OperationUtils.h25
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.cc18
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc40
-rw-r--r--runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h6
-rw-r--r--runtime/onert/backend/cpu/ops/SoftMaxLayer.cc6
-rw-r--r--runtime/onert/backend/cpu/ops/SplitLayer.cc17
-rw-r--r--runtime/onert/backend/cpu/ops/SplitLayer.h4
-rw-r--r--runtime/onert/backend/cpu/ops/StridedSliceLayer.cc12
-rw-r--r--runtime/onert/backend/cpu/ops/TransposeLayer.cc26
-rw-r--r--runtime/onert/backend/cpu/ops/TransposeLayer.h6
-rw-r--r--runtime/onert/core/CMakeLists.txt5
-rw-r--r--runtime/onert/core/include/backend/CustomKernelBuilder.h4
-rw-r--r--runtime/onert/core/include/backend/IDynamicTensorManager.h22
-rw-r--r--runtime/onert/core/include/backend/IPortableTensor.h14
-rw-r--r--runtime/onert/core/include/backend/ITensor.h14
-rw-r--r--runtime/onert/core/include/backend/ITensorBuilder.h16
-rw-r--r--runtime/onert/core/include/backend/ITensorRegistry.h54
-rw-r--r--runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h13
-rw-r--r--runtime/onert/core/include/backend/cpu_common/MemoryManager.h10
-rw-r--r--runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h7
-rw-r--r--runtime/onert/core/include/backend/cpu_common/Tensor.h42
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInference.h3
-rw-r--r--runtime/onert/core/include/exec/DynamicShapeInference.h3
-rw-r--r--runtime/onert/core/include/exec/FunctionSequence.h16
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h15
-rw-r--r--runtime/onert/core/include/ir/Operand.h1
-rw-r--r--runtime/onert/core/include/ir/OperandIndexSequence.h2
-rw-r--r--runtime/onert/core/include/ir/OperandInfo.h1
-rw-r--r--runtime/onert/core/include/ir/Operation.h8
-rw-r--r--runtime/onert/core/include/ir/Sparsity.h64
-rw-r--r--runtime/onert/core/include/ir/TypeInfo.h19
-rw-r--r--runtime/onert/core/include/ir/operation/ArgMax.h4
-rw-r--r--runtime/onert/core/include/ir/operation/LSTM.h7
-rw-r--r--runtime/onert/core/include/ir/operation/ResizeBilinear.h2
-rw-r--r--runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h2
-rw-r--r--runtime/onert/core/include/ir/operation/Split.h4
-rw-r--r--runtime/onert/core/include/ir/operation/Transpose.h15
-rw-r--r--runtime/onert/core/include/util/Config.lst1
-rw-r--r--runtime/onert/core/include/util/ShapeInference.h25
-rw-r--r--runtime/onert/core/include/util/Utils.h81
-rw-r--r--runtime/onert/core/src/backend/IPortableTensor.cc (renamed from runtime/libs/ndarray/src/Array.cpp)18
-rw-r--r--runtime/onert/core/src/backend/controlflow/BackendContext.h60
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc100
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h17
-rw-r--r--runtime/onert/core/src/backend/controlflow/ExternalContext.h62
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.cc39
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.h2
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.cc23
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.h12
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorRegistry.h29
-rw-r--r--runtime/onert/core/src/backend/controlflow/UserTensor.cc13
-rw-r--r--runtime/onert/core/src/backend/controlflow/UserTensor.h14
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc25
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h13
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc5
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h8
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc46
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h10
-rw-r--r--runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc87
-rw-r--r--runtime/onert/core/src/backend/cpu_common/MemoryManager.cc13
-rw-r--r--runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc14
-rw-r--r--runtime/onert/core/src/backend/cpu_common/Tensor.cc54
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc23
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc39
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc73
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h4
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc8
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc13
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc44
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.cc897
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.h40
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.cc1021
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.h102
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInference.cc262
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h2
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc4
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc68
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantOutputPass.h63
-rw-r--r--runtime/onert/core/src/compiler/pass/OddOutputPass.cc90
-rw-r--r--runtime/onert/core/src/compiler/pass/OddOutputPass.h89
-rw-r--r--runtime/onert/core/src/compiler/pass/PassRunner.cc45
-rw-r--r--runtime/onert/core/src/compiler/pass/PassRunner.h53
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc14
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc14
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.h1
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.cc7
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc16
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h7
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInference.cc345
-rw-r--r--runtime/onert/core/src/exec/Execution.cc7
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc5
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h2
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc77
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h30
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc9
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.h175
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.cc4
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h10
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc17
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h7
-rw-r--r--runtime/onert/core/src/exec/Sink.h199
-rw-r--r--runtime/onert/core/src/exec/Source.h208
-rw-r--r--runtime/onert/core/src/interp/Tensor.h1
-rw-r--r--runtime/onert/core/src/ir/Graph.cc2
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.cc4
-rw-r--r--runtime/onert/core/src/ir/Operation.cc21
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc75
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h2
-rw-r--r--runtime/onert/core/src/ir/operation/ArgMax.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/BatchToSpaceND.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/Fill.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/FullyConnected.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/LSTM.cc10
-rw-r--r--runtime/onert/core/src/ir/operation/Pack.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeBilinear.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Split.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Transpose.cc5
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.cc4
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.cc5
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc526
-rw-r--r--runtime/onert/core/src/util/EventRecorder.h20
-rw-r--r--runtime/onert/core/src/util/EventWriter.cc574
-rw-r--r--runtime/onert/core/src/util/EventWriter.h51
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc190
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h1131
-rw-r--r--runtime/onert/frontend/circle/CMakeLists.txt2
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc5
-rw-r--r--runtime/onert/frontend/nnapi/execution.cc24
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc50
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h3
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc (renamed from runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc)6
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc155
-rw-r--r--runtime/onert/frontend/tflite/CMakeLists.txt2
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc5
-rw-r--r--runtime/onert/test/graph/Index.cc2
-rw-r--r--runtime/onert/test/graph/operand/IndexSet.cc4
-rw-r--r--runtime/onert/test/graph/operand/LayoutSet.cc17
-rw-r--r--runtime/onert/test/graph/operand/Set.cc2
-rw-r--r--runtime/onert/test/graph/operand/UseDef.cc4
-rw-r--r--runtime/onert/test/graph/operation/SetIO.cc2
-rw-r--r--runtime/onert/test/graph/verifier/Verifier.cc51
-rw-r--r--runtime/onert/test/util/ShapeInference.cc172
220 files changed, 8212 insertions, 6602 deletions
diff --git a/runtime/contrib/android/api/Android.mk b/runtime/contrib/android/api/Android.mk
index a056eff9d..3c768cca5 100644
--- a/runtime/contrib/android/api/Android.mk
+++ b/runtime/contrib/android/api/Android.mk
@@ -4,7 +4,5 @@ include $(CLEAR_VARS)
API_ROOT_PATH := $(LOCAL_PATH)
PREBUILT_LIB :=
-include $(API_ROOT_PATH)/prebuilt/Android.mk
+include $(API_ROOT_PATH)/Prebuilt.mk
include $(API_ROOT_PATH)/src/main/native/Android.mk
-
-#$(warning $(PREBUILT_LIB))
diff --git a/runtime/contrib/android/api/Prebuilt.mk b/runtime/contrib/android/api/Prebuilt.mk
new file mode 100644
index 000000000..7d9f56582
--- /dev/null
+++ b/runtime/contrib/android/api/Prebuilt.mk
@@ -0,0 +1,70 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+ifndef ONERT_PREBUILT_LIB_DIR
+$(error ONERT_PREBUILT_LIB_DIR is not set)
+endif
+
+# libcircle_loader
+include $(CLEAR_VARS)
+LOCAL_MODULE := circle_loader
+PREBUILT_LIB += circle_loader
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libcircle_loader.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# libtflite_loader
+include $(CLEAR_VARS)
+LOCAL_MODULE := tflite_loader
+PREBUILT_LIB += tflite_loader
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# libtensorflowlite_jni
+include $(CLEAR_VARS)
+LOCAL_MODULE := tensorflowlite_jni
+PREBUILT_LIB += tensorflowlite_jni
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libtensorflowlite_jni.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# libnnfw
+include $(CLEAR_VARS)
+LOCAL_MODULE := nnfw-dev
+PREBUILT_LIB += nnfw-dev
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libnnfw-dev.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# libonert_core
+include $(CLEAR_VARS)
+LOCAL_MODULE := onert_core
+PREBUILT_LIB += onert_core
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libonert_core.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# backend_cpu
+include $(CLEAR_VARS)
+LOCAL_MODULE := backend_cpu
+PREBUILT_LIB += backend_cpu
+LOCAL_SRC_FILES := \
+ $(ONERT_PREBUILT_LIB_DIR)/libbackend_cpu.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# TODO Support backend acl
+# backend_acl
+ifeq ($(ONERT_CONTAINS_ACL), 1)
+ $(error containing acl backend doesn't supported yet)
+endif
+
+# backend_ext
+ifneq ($(ONERT_EXT_PREBUILT_LIB), )
+include $(CLEAR_VARS)
+LOCAL_MODULE := backend_ext
+PREBUILT_LIB += backend_ext
+LOCAL_SRC_FILES := \
+ $(ONERT_EXT_PREBUILT_LIB)
+include $(PREBUILT_SHARED_LIBRARY)
+endif
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index def89eeac..afc53d936 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,11 +8,39 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.9.0"
+ versionName "1.10.0"
externalNativeBuild {
ndkBuild {
- arguments "ONERT_API_INC_DIR=${project.projectDir}/../../../onert/api/include"
+ def onert_header_dir
+ if (project.hasProperty('onertHeaderDir'))
+ onert_header_dir = project.onertHeaderDir
+ else
+ onert_header_dir = "${project.projectDir}/../../../onert/api/include"
+
+ def onert_lib_dir
+ if (project.hasProperty('onertLibDir'))
+ onert_lib_dir = project.onertLibDir
+ else
+ onert_lib_dir = "${project.projectDir}/../../../../Product/out/lib"
+
+ def onert_contains_acl
+ if (project.hasProperty('onertContainsAcl'))
+ onert_contains_acl = 1
+ else
+ onert_contains_acl = 0
+
+ def onert_ext_lib
+ if (project.hasProperty('onertExtLib'))
+ onert_ext_lib = project.onertExtLib
+ else
+ onert_ext_lib = ""
+
+ arguments "ONERT_API_INC_DIR=$onert_header_dir",
+ "ONERT_PREBUILT_LIB_DIR=$onert_lib_dir",
+ "ONERT_CONTAINS_ACL=$onert_contains_acl",
+ "ONERT_EXT_PREBUILT_LIB=$onert_ext_lib"
+
abiFilters 'arm64-v8a'
}
}
diff --git a/runtime/contrib/android/api/prebuilt/Android.mk b/runtime/contrib/android/api/prebuilt/Android.mk
deleted file mode 100644
index e8a9f0755..000000000
--- a/runtime/contrib/android/api/prebuilt/Android.mk
+++ /dev/null
@@ -1,9 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-PREBUILT_PATH := $(LOCAL_PATH)
-include $(PREBUILT_PATH)/backend_cpu/Android.mk
-include $(PREBUILT_PATH)/circle_loader/Android.mk
-include $(PREBUILT_PATH)/nnfw-dev/Android.mk
-include $(PREBUILT_PATH)/onert_core/Android.mk
-include $(PREBUILT_PATH)/tensorflowlite_jni/Android.mk
-include $(PREBUILT_PATH)/tflite_loader/Android.mk
diff --git a/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk b/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk
deleted file mode 100644
index ccda9ea90..000000000
--- a/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := backend_cpu
-PREBUILT_LIB += backend_cpu
-LOCAL_SRC_FILES := \
- libbackend_cpu.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so b/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so
deleted file mode 120000
index 3d577cf5c..000000000
--- a/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libbackend_cpu.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk b/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk
deleted file mode 100644
index 2e481e93e..000000000
--- a/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := circle_loader
-PREBUILT_LIB += circle_loader
-LOCAL_SRC_FILES := \
- libcircle_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so b/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so
deleted file mode 120000
index 528d7017f..000000000
--- a/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libcircle_loader.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk b/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk
deleted file mode 100644
index 10cb8f6f4..000000000
--- a/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := nnfw-dev
-PREBUILT_LIB += nnfw-dev
-LOCAL_SRC_FILES := \
- libnnfw-dev.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so b/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so
deleted file mode 120000
index 1913db8d7..000000000
--- a/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libnnfw-dev.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/onert_core/Android.mk b/runtime/contrib/android/api/prebuilt/onert_core/Android.mk
deleted file mode 100644
index a6682a24f..000000000
--- a/runtime/contrib/android/api/prebuilt/onert_core/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := onert_core
-PREBUILT_LIB += onert_core
-LOCAL_SRC_FILES := \
- libonert_core.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so b/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so
deleted file mode 120000
index bafe11cb9..000000000
--- a/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libonert_core.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk b/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk
deleted file mode 100644
index 823cf0747..000000000
--- a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := tensorflowlite_jni
-PREBUILT_LIB += tensorflowlite_jni
-LOCAL_SRC_FILES := \
- libtensorflowlite_jni.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so b/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so
deleted file mode 120000
index d3d72a5a7..000000000
--- a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libtensorflowlite_jni.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk b/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk
deleted file mode 100644
index 135ac1dad..000000000
--- a/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := tflite_loader
-PREBUILT_LIB += tflite_loader
-LOCAL_SRC_FILES := \
- libtflite_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so b/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so
deleted file mode 120000
index 4c001aec0..000000000
--- a/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libtflite_loader.so \ No newline at end of file
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
index 1644e0f7f..209264d31 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
@@ -121,8 +121,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
if (jni::setInput(handle, params) == false)
{
- __android_log_print(ANDROID_LOG_ERROR, JTAG, "%s] failed native setOutput",
- __PRETTY_FUNCTION__);
+ __android_log_print(ANDROID_LOG_ERROR, JTAG, "%s] failed native setInput", __PRETTY_FUNCTION__);
return JNI_FALSE;
}
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt
index 55dbf0024..beb279cb9 100644
--- a/runtime/contrib/android_benchmark_app/CMakeLists.txt
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -55,7 +55,7 @@ target_link_libraries(android_benchmark_native nnfw_lib_tflite)
target_link_libraries(android_benchmark_native nnfw_lib_misc)
target_link_libraries(android_benchmark_native log)
-nnas_find_package(FlatBuffersSource EXACT 1.11 REQUIRED)
+nnas_find_package(FlatBuffersSource EXACT 1.12 REQUIRED)
target_include_directories(android_benchmark_native PUBLIC ${FlatBuffersSource_DIR}/include .)
add_custom_target(android-benchmark-apk ALL
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.cpp b/runtime/libs/benchmark/include/benchmark/MemoryInfo.h
index e06cfc2a1..6e8e12ba4 100644
--- a/runtime/libs/ndarray/src/ContiguousSpan.cpp
+++ b/runtime/libs/benchmark/include/benchmark/MemoryInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,18 +14,27 @@
* limitations under the License.
*/
-#include "ndarray/ContiguousSpan.h"
+#ifndef __NNFW_BENCHMARK_MEMORY_INFO_H__
+#define __NNFW_BENCHMARK_MEMORY_INFO_H__
-namespace ndarray
+#include <cstdint>
+#include <string>
+
+namespace benchmark
{
-template class ContiguousSpan<float, true>;
-template class ContiguousSpan<float, false>;
-template class ContiguousSpan<int32_t, true>;
-template class ContiguousSpan<int32_t, false>;
-template class ContiguousSpan<uint32_t, true>;
-template class ContiguousSpan<uint32_t, false>;
-template class ContiguousSpan<uint8_t, true>;
-template class ContiguousSpan<uint8_t, false>;
+bool prepareVmRSS();
+bool prepareVmHWM();
+bool prepareGpuMemory();
+bool preparePssSum();
+
+uint32_t getVmRSS();
+uint32_t getVmHWM();
+uint32_t getGpuMemory(const std::string &process_name);
+uint32_t getPssSum();
+
+std::string getProcessName();
+
+} // namespace benchmark
-} // namespace ndarray
+#endif // __NNFW_BENCHMARK_MEMORY_INFO_H__
diff --git a/runtime/libs/benchmark/include/benchmark/MemoryPoller.h b/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
index 48caa3b3a..47db3fd77 100644
--- a/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
+++ b/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
@@ -57,10 +57,6 @@ public:
private:
void process();
bool prepareMemoryPolling();
- uint32_t getVmRSS();
- uint32_t getVmHWM();
- uint32_t getGpuMemory();
- uint32_t getPssSum();
private:
std::chrono::milliseconds _duration;
diff --git a/runtime/libs/benchmark/include/benchmark/Phases.h b/runtime/libs/benchmark/include/benchmark/Phases.h
index 936a89742..7d642782a 100644
--- a/runtime/libs/benchmark/include/benchmark/Phases.h
+++ b/runtime/libs/benchmark/include/benchmark/Phases.h
@@ -50,6 +50,9 @@ public:
const MemoryPoller &mem_poll() const { return *_mem_poll; }
const Phase &at(const std::string &tag) const { return _phases.at(tag); }
+ uint32_t mem_before_init() const { return _mem_before_init; }
+ uint32_t mem_after_run() const { return _mem_after_run; }
+
private:
void run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc *post, uint32_t loop_num,
bool option_disable);
@@ -58,6 +61,8 @@ private:
const PhaseOption _option;
std::unordered_map<std::string, Phase> _phases;
std::unique_ptr<MemoryPoller> _mem_poll;
+ uint32_t _mem_before_init;
+ uint32_t _mem_after_run;
};
} // namespace benchmark
diff --git a/runtime/libs/benchmark/include/benchmark/Result.h b/runtime/libs/benchmark/include/benchmark/Result.h
index 69084b300..7604aa904 100644
--- a/runtime/libs/benchmark/include/benchmark/Result.h
+++ b/runtime/libs/benchmark/include/benchmark/Result.h
@@ -34,6 +34,8 @@ public:
double time[PhaseEnum::END_OF_PHASE][FigureType::END_OF_FIG_TYPE];
uint32_t memory[PhaseEnum::END_OF_PHASE][MemoryType::END_OF_MEM_TYPE];
bool print_memory = false;
+ uint32_t init_memory = 0;
+ uint32_t peak_memory = 0;
};
// TODO Support not only stdout but also ostream
diff --git a/runtime/libs/benchmark/src/MemoryInfo.cpp b/runtime/libs/benchmark/src/MemoryInfo.cpp
new file mode 100644
index 000000000..20d262961
--- /dev/null
+++ b/runtime/libs/benchmark/src/MemoryInfo.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark/MemoryInfo.h"
+
+#include <vector>
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+#include <cassert>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+namespace
+{
+
+const std::string proc_status_path("/proc/self/status");
+const std::string gpu_memory_path("/sys/kernel/debug/mali0/gpu_memory");
+const std::string proc_smaps_path("/proc/self/smaps");
+
+bool isStrNumber(const std::string &s)
+{
+ return !s.empty() &&
+ std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
+}
+
+std::vector<std::string> splitLine(std::string line, std::string delimiters = " \n\t")
+{
+ std::vector<std::string> words;
+ size_t prev = 0, pos;
+
+ while ((pos = line.find_first_of(delimiters, prev)) != std::string::npos)
+ {
+ if (pos > prev)
+ words.emplace_back(line.substr(prev, pos - prev));
+ prev = pos + 1;
+ }
+
+ if (prev < line.length())
+ words.emplace_back(line.substr(prev, std::string::npos));
+
+ return words;
+}
+
+std::vector<std::string> getValueFromFileStatus(const std::string &file, const std::string &key)
+{
+ std::ifstream ifs(file);
+ assert(ifs.is_open());
+
+ std::string line;
+ std::vector<std::string> val;
+
+ bool found = false;
+ while (std::getline(ifs, line))
+ {
+ if (line.find(key) != std::string::npos)
+ {
+ found = true;
+ break;
+ }
+ }
+ ifs.close();
+
+ if (!found)
+ {
+ // NOTE. the process which uses gpu resources cannot be there yet at the model-load phase.
+ // At that time, just return empty.
+ return val;
+ }
+
+ val = splitLine(line);
+ return val;
+}
+
+// Because of smaps' structure, returns sum value as uint32_t
+uint32_t getSumValueFromFileSmaps(const std::string &file, const std::string &key)
+{
+ std::ifstream ifs(file);
+ assert(ifs.is_open());
+
+ std::string line;
+ uint32_t sum = 0;
+ while (std::getline(ifs, line))
+ {
+ if (line.find(key) != std::string::npos)
+ {
+ // an example by splitLine()
+ // `Pss: 0 kB`
+ // val[0]: "Pss:", val[1]: "0" val[2]: "kB"
+ auto val = splitLine(line);
+ assert(val.size() != 0);
+ // SwapPss could show so that check where Pss is at the beginning
+ if (val[0].find("Pss") != 0)
+ {
+ continue;
+ }
+ sum += std::stoul(val[1]);
+ }
+ }
+
+ return sum;
+}
+
+} // namespace
+
+namespace benchmark
+{
+
+bool prepareVmRSS() { return std::ifstream(proc_status_path).is_open(); }
+
+bool prepareVmHWM() { return std::ifstream(proc_status_path).is_open(); }
+
+bool prepareGpuMemory() { return std::ifstream(gpu_memory_path).is_open(); }
+
+bool preparePssSum() { return std::ifstream(proc_smaps_path).is_open(); }
+
+uint32_t getVmRSS()
+{
+ auto val = getValueFromFileStatus(proc_status_path, "VmRSS");
+ if (val.size() == 0)
+ return 0;
+ assert(isStrNumber(val[1]));
+ return std::stoul(val[1]);
+}
+
+uint32_t getVmHWM()
+{
+ auto val = getValueFromFileStatus(proc_status_path, "VmHWM");
+ if (val.size() == 0)
+ return 0;
+ // key: value
+ assert(isStrNumber(val[1]));
+ return std::stoul(val[1]);
+}
+
+uint32_t getGpuMemory(const std::string &process_name)
+{
+ assert(!process_name.empty());
+ auto val = getValueFromFileStatus(gpu_memory_path, process_name);
+ if (val.size() == 0)
+ return 0;
+ // process_name -> pid -> gpu_mem -> max_gpu_mem
+ assert(isStrNumber(val[2]));
+ return std::stoul(val[2]);
+}
+
+uint32_t getPssSum() { return getSumValueFromFileSmaps(proc_smaps_path, "Pss"); }
+
+std::string getProcessName()
+{
+ auto val = getValueFromFileStatus(proc_status_path, "Name");
+ assert(val.size() >= 2);
+ return val[1];
+}
+
+} // namespace benchmark
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp
index 61fdecd46..050b5b163 100644
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -16,106 +16,13 @@
#include "benchmark/MemoryPoller.h"
#include "benchmark/Types.h"
+#include "benchmark/MemoryInfo.h"
#include <vector>
-#include <fstream>
-#include <sstream>
#include <stdexcept>
#include <cassert>
#include <iostream>
-namespace
-{
-
-const std::string proc_status_path("/proc/self/status");
-const std::string gpu_memory_path("/sys/kernel/debug/mali0/gpu_memory");
-const std::string proc_smaps_path("/proc/self/smaps");
-
-bool isStrNumber(const std::string &s)
-{
- return !s.empty() &&
- std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
-}
-
-std::vector<std::string> splitLine(std::string line, std::string delimiters = " \n\t")
-{
- std::vector<std::string> words;
- size_t prev = 0, pos;
-
- while ((pos = line.find_first_of(delimiters, prev)) != std::string::npos)
- {
- if (pos > prev)
- words.emplace_back(line.substr(prev, pos - prev));
- prev = pos + 1;
- }
-
- if (prev < line.length())
- words.emplace_back(line.substr(prev, std::string::npos));
-
- return words;
-}
-
-std::vector<std::string> getValueFromFileStatus(const std::string &file, const std::string &key)
-{
- std::ifstream ifs(file);
- assert(ifs.is_open());
-
- std::string line;
- std::vector<std::string> val;
-
- bool found = false;
- while (std::getline(ifs, line))
- {
- if (line.find(key) != std::string::npos)
- {
- found = true;
- break;
- }
- }
- ifs.close();
-
- if (!found)
- {
- // NOTE. the process which uses gpu resources cannot be there yet at the model-load phase.
- // At that time, just return empty.
- return val;
- }
-
- val = splitLine(line);
- return val;
-}
-
-// Because of smaps' structure, returns sum value as uint32_t
-uint32_t getSumValueFromFileSmaps(const std::string &file, const std::string &key)
-{
- std::ifstream ifs(file);
- assert(ifs.is_open());
-
- std::string line;
- uint32_t sum = 0;
- while (std::getline(ifs, line))
- {
- if (line.find(key) != std::string::npos)
- {
- // an example by splitLine()
- // `Pss: 0 kB`
- // val[0]: "Pss:", val[1]: "0" val[2]: "kB"
- auto val = splitLine(line);
- assert(val.size() != 0);
- // SwapPss could show so that check where Pss is at the beginning
- if (val[0].find("Pss") != 0)
- {
- continue;
- }
- sum += std::stoul(val[1]);
- }
- }
-
- return sum;
-}
-
-} // namespace
-
namespace benchmark
{
@@ -168,7 +75,7 @@ bool MemoryPoller::end(PhaseEnum phase)
mem = getVmRSS();
if (_gpu_poll)
{
- mem += getGpuMemory();
+ mem += getGpuMemory(_process_name);
}
if (mem > _rss_map[phase])
_rss_map[phase] = mem;
@@ -176,7 +83,7 @@ bool MemoryPoller::end(PhaseEnum phase)
mem = getVmHWM();
if (_gpu_poll)
{
- mem += getGpuMemory();
+ mem += getGpuMemory(_process_name);
}
_hwm_map[phase] = mem;
@@ -208,7 +115,7 @@ void MemoryPoller::process()
uint32_t cur_hwm = getVmHWM();
if (_gpu_poll)
{
- auto gpu_mem = getGpuMemory();
+ auto gpu_mem = getGpuMemory(_process_name);
cur_rss += gpu_mem;
cur_hwm += gpu_mem;
}
@@ -236,77 +143,33 @@ void MemoryPoller::process()
bool MemoryPoller::prepareMemoryPolling()
{
// VmRSS
+ if (!prepareVmRSS())
{
- std::ifstream ifs(proc_status_path);
- if (!ifs.is_open())
- {
- std::cerr << "failed to open " << proc_status_path << std::endl;
- return false;
- }
- ifs.close();
+ std::cerr << "failed to prepare parsing vmrss" << std::endl;
+ return false;
}
// (Additionally) GpuMemory
if (_gpu_poll)
{
- std::ifstream ifs(gpu_memory_path);
- if (!ifs.is_open())
+ if (!prepareGpuMemory())
{
- std::cerr << "failed to open " << gpu_memory_path << std::endl;
+ std::cerr << "failed to prepare parsing gpu memory" << std::endl;
return false;
}
- ifs.close();
// Needs process name
- auto val = getValueFromFileStatus(proc_status_path, "Name");
- assert(val.size() != 0);
- _process_name = val[1];
+ _process_name = getProcessName();
}
// PSS
+ if (!preparePssSum())
{
- std::ifstream ifs(proc_smaps_path);
- if (!ifs.is_open())
- {
- std::cerr << "failed to open " << proc_smaps_path << std::endl;
- return false;
- }
- ifs.close();
+ std::cerr << "failed to prepare parsing pss sum" << std::endl;
+ return false;
}
return true;
}
-uint32_t MemoryPoller::getVmRSS()
-{
- auto val = getValueFromFileStatus(proc_status_path, "VmRSS");
- if (val.size() == 0)
- return 0;
- assert(isStrNumber(val[1]));
- return std::stoul(val[1]);
-}
-
-uint32_t MemoryPoller::getVmHWM()
-{
- auto val = getValueFromFileStatus(proc_status_path, "VmHWM");
- if (val.size() == 0)
- return 0;
- // key: value
- assert(isStrNumber(val[1]));
- return std::stoul(val[1]);
-}
-
-uint32_t MemoryPoller::getGpuMemory()
-{
- assert(!_process_name.empty());
- auto val = getValueFromFileStatus(gpu_memory_path, _process_name);
- if (val.size() == 0)
- return 0;
- // process_name -> pid -> gpu_mem -> max_gpu_mem
- assert(isStrNumber(val[2]));
- return std::stoul(val[2]);
-}
-
-uint32_t MemoryPoller::getPssSum() { return getSumValueFromFileSmaps(proc_smaps_path, "Pss"); }
-
} // namespace benchmark
diff --git a/runtime/libs/benchmark/src/Phases.cpp b/runtime/libs/benchmark/src/Phases.cpp
index 9ab67cfd9..897b943d3 100644
--- a/runtime/libs/benchmark/src/Phases.cpp
+++ b/runtime/libs/benchmark/src/Phases.cpp
@@ -17,6 +17,7 @@
#include "benchmark/Phases.h"
#include "benchmark/Types.h"
+#include "benchmark/MemoryInfo.h"
#include <cassert>
#include <chrono>
@@ -46,8 +47,11 @@ void SleepForMicros(uint64_t micros)
namespace benchmark
{
-Phases::Phases(const PhaseOption &option) : _option(option)
+Phases::Phases(const PhaseOption &option) : _option(option), _mem_before_init(0), _mem_after_run(0)
{
+ assert(prepareVmRSS());
+ _mem_before_init = getVmHWM();
+
if (_option.memory)
{
_mem_poll = std::make_unique<MemoryPoller>(std::chrono::milliseconds(option.memory_interval),
@@ -93,6 +97,8 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
}
}
+ _mem_after_run = getVmHWM();
+
if (p == PhaseEnum::END_OF_PHASE)
{
return;
diff --git a/runtime/libs/benchmark/src/Result.cpp b/runtime/libs/benchmark/src/Result.cpp
index df573da92..e6cafb91c 100644
--- a/runtime/libs/benchmark/src/Result.cpp
+++ b/runtime/libs/benchmark/src/Result.cpp
@@ -141,6 +141,15 @@ void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
}
}
+void printUsedPeakMemory(uint32_t init_memory, uint32_t peak_memory)
+{
+ uint32_t used_peak_memory = peak_memory - init_memory;
+ std::cout << "Used Peak Memory : " << used_peak_memory << " kb" << std::endl;
+ std::cout << "- HWM after run : " << peak_memory << " kb" << std::endl;
+ std::cout << "- HWM before init: " << init_memory << " kb" << std::endl;
+ std::cout << "===================================" << std::endl;
+}
+
} // namespace
namespace benchmark
@@ -175,6 +184,8 @@ Result::Result(const Phases &phases)
}
}
}
+ init_memory = phases.mem_before_init();
+ peak_memory = phases.mem_after_run();
}
void printResult(const Result &result)
@@ -185,6 +196,7 @@ void printResult(const Result &result)
return;
printResultMemory(result.memory);
+ printUsedPeakMemory(result.init_memory, result.peak_memory);
}
// TODO There are necessary for a kind of output data file so that it doesn't have to be csv file
diff --git a/runtime/libs/misc/include/misc/polymorphic_downcast.h b/runtime/libs/misc/include/misc/polymorphic_downcast.h
index 412b864e6..ee885eb70 100644
--- a/runtime/libs/misc/include/misc/polymorphic_downcast.h
+++ b/runtime/libs/misc/include/misc/polymorphic_downcast.h
@@ -27,9 +27,7 @@ namespace misc
template <typename DstType, typename SrcType> inline DstType polymorphic_downcast(SrcType *x)
{
-#ifndef __ANDROID__
assert(dynamic_cast<DstType>(x) == x);
-#endif
return static_cast<DstType>(x);
}
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
deleted file mode 100644
index b040f5115..000000000
--- a/runtime/libs/ndarray/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
-
-set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
-target_include_directories(ndarray PUBLIC include)
-#can't make this private because of c++ templates
-target_include_directories(ndarray PUBLIC src)
-
-option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
-
-if(${NDARRAY_INLINE_TEMPLATES})
- target_compile_definitions(ndarray PUBLIC -DNDARRAY_INLINE_TEMPLATES=1)
-endif()
-
-target_link_libraries(ndarray PRIVATE nnfw_common)
-target_link_libraries(ndarray PRIVATE nnfw_coverage)
-
-add_subdirectory(test)
-add_subdirectory(example)
diff --git a/runtime/libs/ndarray/example/CMakeLists.txt b/runtime/libs/ndarray/example/CMakeLists.txt
deleted file mode 100644
index c4b575dad..000000000
--- a/runtime/libs/ndarray/example/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_executable(example_no_array example_no_array.cpp)
-
-add_executable(example_array example_array.cpp)
-target_link_libraries(example_array PRIVATE ndarray)
diff --git a/runtime/libs/ndarray/example/example_array.cpp b/runtime/libs/ndarray/example/example_array.cpp
deleted file mode 100644
index 85d274681..000000000
--- a/runtime/libs/ndarray/example/example_array.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ndarray/Array.h"
-
-#include <iostream>
-#include <iterator>
-
-using namespace ndarray;
-
-void gather_array(const Array<float> &input, Array<float> &output, const Array<int> &indices)
-{
- assert(indices.shape().rank() == 3);
- assert(input.shape().rank() == 3);
- assert(indices.shape().dim(1) == input.shape().rank());
-
- for (size_t i = 0; i < indices.shape().dim(0); ++i)
- {
- for (size_t j = 0; j < indices.shape().dim(1); ++j)
- {
- auto index = indices.slice(i, j);
- output.slice(i, j).assign(input.slice(index[0], index[1]));
- }
- }
-}
-
-int main()
-{
- // fill tensor of shape[3,3,4] with sequential numbers from [0..36)
- Shape in_shape{3, 3, 4};
- std::vector<float> input_data(in_shape.element_count());
- for (size_t i = 0; i < in_shape.element_count(); ++i)
- input_data[i] = i;
-
- Array<float> input(input_data.data(), in_shape);
-
- // select column-vectors on main diagonal
- Shape indices_shape{1, 3, 2};
- std::vector<int> indices_data(indices_shape.element_count());
- Array<int> indices(indices_data.data(), indices_shape);
-
- indices.slice(0, 0) = {0, 0};
- indices.slice(0, 1) = {1, 1};
- indices.slice(0, 2) = {2, 2};
-
- Shape output_shape{1, 3, 4};
- std::vector<float> output_data(output_shape.element_count());
-
- Array<float> output(output_data.data(), output_shape);
-
- gather_array(input, output, indices);
-
- for (size_t i = 0; i < indices_shape.dim(0); ++i)
- {
- for (size_t j = 0; j < indices_shape.dim(1); ++j)
- {
- auto output_piece = output.slice(i, j);
- std::ostream_iterator<int> cout_it(std::cout, ", ");
- std::copy(output_piece.begin(), output_piece.end(), cout_it);
- std::cout << std::endl;
- }
- }
-}
diff --git a/runtime/libs/ndarray/example/example_no_array.cpp b/runtime/libs/ndarray/example/example_no_array.cpp
deleted file mode 100644
index 3a4d05dca..000000000
--- a/runtime/libs/ndarray/example/example_no_array.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <array>
-#include <vector>
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-
-void gather_no_array(const float *in_data, const std::array<size_t, 3> &dims, float *out_data,
- const std::array<size_t, 3> &out_dims, //[nselections,
- const int *indices, const std::array<size_t, 3> &indices_dims)
-{
- assert(indices_dims[1] == dims.size());
-
- for (int i = 0; i < indices_dims[0]; ++i)
- {
- for (int j = 0; j < indices_dims[1]; ++j)
- {
- const int *index_ptr = indices + i * indices_dims[2] * indices_dims[1] + j * indices_dims[2];
-
- size_t in_offset = index_ptr[0] * dims[2] * dims[1] + index_ptr[1] * dims[2];
-
- const float *in_ptr = in_data + in_offset;
-
- size_t out_offset = i * out_dims[2] * out_dims[1] + j * out_dims[2];
-
- float *out_ptr = out_data + out_offset;
-
- for (int k = 0; k < dims[2]; ++k)
- {
- out_ptr[k] = in_ptr[k];
- }
- }
- }
-}
-
-int main()
-{
- std::array<size_t, 3> in_dims{3, 3, 4};
- std::vector<float> input(3 * 3 * 4);
- for (size_t i = 0; i < 3 * 3 * 4; ++i)
- input[i] = i;
-
- std::array<size_t, 3> indices_shape{1, 3, 2};
- std::vector<int> indices(1 * 3 * 2);
-
- indices[0] = 0;
- indices[1] = 0;
- indices[2] = 1;
- indices[3] = 1;
- indices[4] = 2;
- indices[5] = 2;
-
- std::array<size_t, 3> output_dims{1, 3, 4};
- std::vector<float> output(1 * 3 * 4);
-
- gather_no_array(input.data(), in_dims, output.data(), output_dims, indices.data(), indices_shape);
-
- for (size_t i = 0; i < output_dims[0]; ++i)
- {
- for (size_t j = 0; j < output_dims[1]; ++j)
- {
- auto out_ptr = output.data() + i * output_dims[1] * output_dims[2] + j * output_dims[2];
- for (size_t k = 0; k < output_dims[2]; ++k)
- {
- std::cout << out_ptr[k] << ", ";
- }
- std::cout << std::endl;
- }
- }
-}
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
deleted file mode 100644
index 3890cc26b..000000000
--- a/runtime/libs/ndarray/include/ndarray/Array.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_ARRAY_H_
-#define _NDARRAY_ARRAY_H_
-
-#include "Common.h"
-
-#include "ContiguousSpan.h"
-#include "Shape.h"
-
-#if __cplusplus < 201402L
-#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
-#else
-#include <utility>
-#endif
-
-#include <algorithm>
-#include <cassert>
-#include <type_traits>
-#include <array>
-#include <tuple>
-#include <cstddef>
-
-namespace ndarray
-{
-
-// there is no index_sequence before c++14
-#if __cplusplus < 201402L
-
-template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
-
-template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
-
-#else
-
-template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
-
-template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
-
-#endif //__cplusplus < 201402L
-
-struct Strides
-{
- explicit Strides(Shape s) : _strides{} { fillStrides(s); }
-
- int operator[](size_t idx) const noexcept { return _strides[idx]; }
-
- // since we don't have c++14 fold expression
- template <typename Seq, typename... Ts> struct _calc_offset;
-
- template <size_t Num, size_t... Nums, typename T, typename... Ts>
- struct _calc_offset<index_sequence<Num, Nums...>, T, Ts...>
- {
- static constexpr size_t get(const std::array<int, 8> &strides, int x, Ts... xs)
- {
- return _calc_offset<index_sequence<Nums...>, Ts...>::get(strides, xs...) +
- x * std::get<Num>(strides);
- }
- };
-
- template <size_t Num, typename T> struct _calc_offset<index_sequence<Num>, T>
- {
- static constexpr size_t get(const std::array<int, 8> &strides, int x)
- {
- return x * std::get<Num>(strides);
- }
- };
-
- template <typename Seq, typename... Ts> constexpr size_t offset(Seq, Ts... x) const noexcept
- {
- // return ( 0 + ... + (std::get<Nums>(_strides) * x)); in c++14
- return _calc_offset<Seq, Ts...>::get(_strides, x...);
- }
-
-private:
- void fillStrides(const Shape &s) noexcept
- {
- int rank = s.rank();
- _strides[rank - 1] = 1;
- for (int d = rank - 2; d >= 0; --d)
- {
- _strides[d] = _strides[d + 1] * s.dim(d + 1);
- }
- }
-
- std::array<int, NDARRAY_MAX_DIMENSION_COUNT> _strides;
-};
-
-template <typename T> class Array
-{
-public:
- Array(T *data, Shape shape) noexcept : _data(data), _shape(shape), _strides(shape) {}
-
- Array(const Array &) = delete;
-
- Array(Array &&a) noexcept : _data(a._data), _shape(a._shape), _strides(a._strides)
- {
- a._data = nullptr;
- }
-
- template <typename... Ts> T &at(Ts... x) const noexcept { return _at(static_cast<size_t>(x)...); }
-
- /**
- * @brief returns last dimension as ContigniousSpan
- * @param x indices of slice to take. See tests for usage details
- * @return slice at given position
- */
- template <typename... Ts> ContiguousSpan<T, std::is_const<T>::value> slice(Ts... x) noexcept
- {
- assert(sizeof...(Ts) == _shape.rank() - 1);
- return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
- }
-
- /**
- * @brief returns last dimension as ContigniousSpan
- * @param x indices of slice to take. See tests for usage details
- * @return slice at given position
- */
- template <typename... Ts> ContiguousSpan<T, true> slice(Ts... x) const noexcept
- {
- assert(sizeof...(Ts) == _shape.rank() - 1);
- return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
- }
-
- ContiguousSpan<T, std::is_const<T>::value> flat() noexcept
- {
- return {_data, _shape.element_count()};
- }
-
- ContiguousSpan<T, true> flat() const noexcept { return {_data, _shape.element_count()}; }
-
- const Shape &shape() const noexcept { return _shape; }
-
-private:
- template <typename... Ts> T &_at(Ts... x) const noexcept
- {
- assert(sizeof...(x) == _shape.rank());
- using Indices = make_index_sequence<sizeof...(Ts)>;
- return _data[offset(Indices{}, x...)];
- }
-
- template <typename... Ts, size_t... Nums>
- size_t offset(index_sequence<Nums...> seq, Ts... x) const noexcept
- {
- static_assert(
- sizeof...(Ts) == sizeof...(Nums),
- "Sanity check failed. Generated index sequence size is not equal to argument count");
-
- return _strides.offset(seq, x...);
- }
-
- T *_data;
- Shape _shape;
- Strides _strides;
-};
-
-template <typename To, typename From> Array<To> array_cast(Array<From> &&from, Shape newShape)
-{
- assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
- return Array<To>(reinterpret_cast<To *>(from.flat().data()), newShape);
-}
-
-template <typename To, typename From>
-Array<const To> array_cast(const Array<From> &from, Shape newShape)
-{
- assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
- return Array<To>(reinterpret_cast<const To *>(from.flat().data()), newShape);
-}
-
-#ifndef NDARRAY_INLINE_TEMPLATES
-
-extern template class Array<float>;
-extern template class Array<int32_t>;
-extern template class Array<uint32_t>;
-extern template class Array<uint8_t>;
-
-#endif // NDARRAY_INLINE_TEMPLATES
-
-} // namespace ndarray
-
-#endif //_NDARRAY_ARRAY_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Common.h b/runtime/libs/ndarray/include/ndarray/Common.h
deleted file mode 100644
index aa0cc6fe2..000000000
--- a/runtime/libs/ndarray/include/ndarray/Common.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_COMMON_H_
-#define _NDARRAY_COMMON_H_
-
-#define NDARRAY_MAX_DIMENSION_COUNT 8
-
-#endif //_NDARRAY_COMMON_H_
diff --git a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
deleted file mode 100644
index 8caa6a686..000000000
--- a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CONTIGNIOUS_SPAN_H_
-#define _NDARRAY_CONTIGNIOUS_SPAN_H_
-
-#include <type_traits>
-#include <vector>
-#include <cstdint>
-#include <cstddef>
-#include <cassert>
-
-namespace ndarray
-{
-
-template <typename T, bool isConst = false> class ContiguousSpan
-{
-public:
- using pointer_type = typename std::conditional<isConst, const T *, T *>::type;
- using reference_type = typename std::conditional<isConst, const T &, T &>::type;
- using iterator_type = pointer_type;
-
- ContiguousSpan(pointer_type data, size_t len) noexcept : _data(data), _len(len) {}
-
- template <typename It>
- explicit ContiguousSpan(It first, It last) noexcept
- : _data(&*first), _len(std::distance(first, last))
- {
- }
-
- ContiguousSpan(const ContiguousSpan &) = delete;
-
- ContiguousSpan(ContiguousSpan &&s) noexcept : _data(s._data), _len(s._len) { s._data = nullptr; }
-
- operator ContiguousSpan<T, true>() { return ContiguousSpan<T, true>{_data, _len}; }
-
- reference_type operator[](size_t idx) const noexcept { return _data[idx]; }
-
- reference_type at(size_t idx) const noexcept { return _data[idx]; }
-
- ContiguousSpan<T, isConst> offset(size_t offset)
- {
- assert(offset <= _len);
- return {_data + offset, _len - offset};
- }
-
- template <typename From, bool _ = isConst>
- typename std::enable_if<!_, void>::type assign(const From &f) noexcept
- {
- assignFrom(std::begin(f), std::end(f));
- }
-
- template <typename U, bool _ = isConst>
- typename std::enable_if<!_, ContiguousSpan &>::type
- operator=(std::initializer_list<U> list) noexcept
- {
- assignFrom(std::begin(list), std::end(list));
- return *this;
- }
-
- template <typename It, bool _ = isConst>
- typename std::enable_if<!_, void>::type assignFrom(It first, It last) noexcept
- {
- std::copy(first, last, begin());
- }
-
- size_t size() const { return _len; }
-
- iterator_type begin() const { return iterator_type{_data}; }
-
- iterator_type end() const { return iterator_type{_data + _len}; }
-
- pointer_type data() { return _data; }
-
-private:
- pointer_type _data;
- size_t _len;
-};
-
-#ifndef NDARRAY_INLINE_TEMPLATES
-
-extern template class ContiguousSpan<float, true>;
-extern template class ContiguousSpan<float, false>;
-extern template class ContiguousSpan<int32_t, true>;
-extern template class ContiguousSpan<int32_t, false>;
-extern template class ContiguousSpan<uint32_t, true>;
-extern template class ContiguousSpan<uint32_t, false>;
-extern template class ContiguousSpan<uint8_t, true>;
-extern template class ContiguousSpan<uint8_t, false>;
-
-#endif // NDARRAY_INLINE_TEMPLATES
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CONTIGNIOUS_SPAN_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Shape.h b/runtime/libs/ndarray/include/ndarray/Shape.h
deleted file mode 100644
index fa58613b8..000000000
--- a/runtime/libs/ndarray/include/ndarray/Shape.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_SHAPE_H_
-#define _NDARRAY_SHAPE_H_
-
-#include "Common.h"
-
-#include <array>
-#include <cassert>
-#include <cstddef>
-
-namespace ndarray
-{
-
-class Shape
-{
-public:
- //_dims{} here and later since array does not have std::initializer_list ctor
- // and aggregate initialization is not allowed here
- explicit Shape(size_t rank) noexcept : _dims{}, _rank(rank)
- {
- std::fill(_dims.begin(), _dims.end(), 0);
- }
-
- Shape(std::initializer_list<size_t> list) noexcept : _dims{}, _rank(list.size())
- {
- std::copy(list.begin(), list.end(), _dims.begin());
- }
-
- size_t dim(int i) const noexcept { return _dims.at(i); }
-
- size_t &dim(int i) noexcept { return _dims.at(i); }
-
- size_t element_count() const noexcept
- {
- uint32_t res = 1;
- for (size_t i = 0; i < rank(); ++i)
- res *= dim(i);
- assert(res <= 0xffffffff);
- return res;
- }
-
- size_t rank() const noexcept { return _rank; }
-
-private:
- std::array<size_t, NDARRAY_MAX_DIMENSION_COUNT> _dims;
- size_t _rank;
-};
-
-} // namespace ndarray
-
-#endif //_NDARRAY_SHAPE_H_
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
deleted file mode 100644
index 81135b3f2..000000000
--- a/runtime/libs/ndarray/src/detail/cxx14.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CXX14_H_
-#define _NDARRAY_CXX14_H_
-
-namespace ndarray
-{
-
-namespace cxx14
-{
-
-template <size_t... Nums> struct index_sequence
-{
- using value_type = size_t;
-
- static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
-};
-
-namespace detail
-{
-
-template <size_t v, typename Seq> struct _append;
-
-template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
-{
- using result = index_sequence<Nums..., v>;
-};
-
-template <size_t Len> struct make_index_sequence
-{
- using result =
- typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
-};
-
-template <> struct make_index_sequence<1>
-{
- using result = index_sequence<0>;
-};
-
-template <> struct make_index_sequence<0>
-{
- using result = index_sequence<>;
-};
-
-} // namespace detail
-
-template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
-
-} // namespace cxx14
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
deleted file mode 100644
index 16f8779ee..000000000
--- a/runtime/libs/ndarray/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT BUILD_NDARRAY_TEST)
- return()
-endif()
-
-add_executable(ndarray_test ndarray_test.cpp)
-
-target_link_libraries(ndarray_test PRIVATE ndarray)
-
-nnfw_find_package(GTest)
-if(NOT GTest_FOUND)
- message(STATUS "GTest not avaialble. Skipping NDArray test build")
- return()
-endif(NOT GTest_FOUND)
-
-target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
-
-add_test(ndarray_test ndarray_test)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
deleted file mode 100644
index 0aa948c72..000000000
--- a/runtime/libs/ndarray/test/ndarray_test.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "ndarray/Array.h"
-
-using namespace ndarray;
-
-TEST(NDArray_tests, basic_data_test)
-{
-
- float raw_data[] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
-
- Array<float> data14{raw_data, {1, 4}};
- ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data22.at(0, 2), 3);
- ASSERT_FLOAT_EQ(data22.at(0, 3), 4);
-}
-
-TEST(NDArray_tests, slice_write_test)
-{
- float raw_data[4] = {0};
-
- Array<float> data22{raw_data, {2, 2}};
-
- data22.slice(1) = {1, 2};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
-}
-
-TEST(NDArray_tests, slice_read_test)
-{
- float raw_data[4] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- auto slice = data22.slice(1);
-
- ASSERT_FLOAT_EQ(slice[0], 3);
- ASSERT_FLOAT_EQ(slice[1], 4);
-}
-
-TEST(NDArray_tests, multidim_test)
-{
- float raw_data[5] = {0, 1, 2, 3, 4};
-
- Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
-}
-
-TEST(NDArray_tests, slice_assign_test)
-{
- std::vector<float> v1{1, 2, 3, 4, 5};
- std::vector<float> v2(5);
-
- ContiguousSpan<float> span1(v1.begin(), v1.end());
- ContiguousSpan<float> span2(v2.begin(), v2.end());
-
- span2.assign(span1);
-
- ASSERT_EQ(v1, v2);
-}
diff --git a/runtime/libs/nnapi/CMakeLists.txt b/runtime/libs/nnapi/CMakeLists.txt
index a5d9490d1..73f82b909 100644
--- a/runtime/libs/nnapi/CMakeLists.txt
+++ b/runtime/libs/nnapi/CMakeLists.txt
@@ -1,3 +1,4 @@
-add_subdirectories()
+add_library(nnfw_lib_nnapi INTERFACE)
-add_library(nnfw_lib_nnapi ALIAS nnfw_lib_nnapi_1_2)
+target_include_directories(nnfw_lib_nnapi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(nnfw_lib_nnapi INTERFACE nnfw-nnapi-header)
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksExShim.h b/runtime/libs/nnapi/include/NeuralNetworksExShim.h
index 855613241..855613241 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksExShim.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksExShim.h
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksLoadHelpers.h b/runtime/libs/nnapi/include/NeuralNetworksLoadHelpers.h
index 1c482b54c..1c482b54c 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksLoadHelpers.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksLoadHelpers.h
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h b/runtime/libs/nnapi/include/NeuralNetworksShim.h
index 80082383f..80082383f 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksShim.h
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
index d74402749..d74402749 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
diff --git a/runtime/libs/nnapi/v1.1/CMakeLists.txt b/runtime/libs/nnapi/v1.1/CMakeLists.txt
deleted file mode 100644
index dc018c60f..000000000
--- a/runtime/libs/nnapi/v1.1/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(nnfw_lib_nnapi_1_1 INTERFACE)
-
-target_include_directories(nnfw_lib_nnapi_1_1 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_nnapi_1_1 INTERFACE nnfw-nnapi-header)
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h
deleted file mode 100644
index f684dab90..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-/**
- * @file NeuralNetworksExShim.h
- * @brief This file contains an actual implementation of
- * ANeuralNetworksModel_addOperationEx function
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef NN_API_EX_SHIM_H
-#define NN_API_EX_SHIM_H
-
-#include "NeuralNetworksEx.h"
-#include "NeuralNetworksLoadHelpers.h"
-
-typedef int (*ANeuralNetworksModel_addOperationEx_fn)(ANeuralNetworksModel *model,
- ANeuralNetworksOperationTypeEx type,
- uint32_t inputCount, const uint32_t *inputs,
- uint32_t outputCount,
- const uint32_t *outputs);
-
-/**
- * @brief Add an extended operation to a model.
- *
- * @param[in] model The model to be modified.
- * @param[in] type The type of extended operation.
- * @param[in] inputCount The number of entries in the inputs array.
- * @param[in] inputs An array of indexes identifying each operand.
- * @param[in] outputCount The number of entries in the outputs array.
- * @param[in] outputs An array of indexes identifying each operand.
- *
- * @note The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.\n
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish}
- * has been called will return an error.\n
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-
-inline int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
- ANeuralNetworksOperationTypeEx type,
- uint32_t inputCount, const uint32_t *inputs,
- uint32_t outputCount, const uint32_t *outputs)
-{
- LOAD_FUNCTION(ANeuralNetworksModel_addOperationEx);
- EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, outputs);
-}
-
-#endif // NN_API_EX_SHIM_H
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h
deleted file mode 100644
index 201465f9c..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from part of the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h'
-
-/**
- * @file NeuralNetworksLoadHelpers.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains functions to load NN API runtime library
- */
-
-#ifndef __NEURAL_NETWORKS_LOAD_HELPER_H__
-#define __NEURAL_NETWORKS_LOAD_HELPER_H__
-
-#include <dlfcn.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-/**
- * @brief Print log data
- * @param[in] format Format string of @c printf
- * @param[in] args Argument after format string. (Same with @c printf)
- */
-#define NNAPI_LOG(format, ...) printf(format "\n", __VA_ARGS__);
-
-/**
- * @brief Create a function pointer named @c fn after loading NN API library
- * @param[in] name Name of a function
- */
-#define LOAD_FUNCTION(name) \
- static name##_fn fn = reinterpret_cast<name##_fn>(nnfw::loadFunction(#name));
-
-/**
- * @brief Run @c fn function. @c fn is created by @ref LOAD_FUNCTION
- * @param[in] args List of arguments for the function @c fn
- */
-#define EXECUTE_FUNCTION(...) \
- if (fn != nullptr) { \
- fn(__VA_ARGS__); \
- }
-
-/**
- * @brief Run @c fn function. @c fn is created by @ref LOAD_FUNCTION
- * @param[in] args List of arguments for the function @c fn
- * @return the return value of @c fn
- */
-#define EXECUTE_FUNCTION_RETURN(...) return fn != nullptr ? fn(__VA_ARGS__) : 0;
-
-namespace nnfw
-{
-
-/**
- * @brief Load NN API library
- * @param[in] name path of NN API library
- * @return a symbol table handle of NN API library
- */
-inline void* loadLibrary(const char* name) {
- // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn
- // api RT
- void* handle = nullptr;
-#if 1 //#ifdef __ANDROID__
- handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL);
- if (handle == nullptr) {
- NNAPI_LOG("nnapi error: unable to open library %s", name);
- NNAPI_LOG(" %s", dlerror());
- }
-#endif
- return handle;
-}
-
-/**
- * @brief Load libneuralnetworks.so and return handle of library
- * @return a symbol table handle of NN API library
- */
-inline void* getLibraryHandle() {
- static void* handle = loadLibrary("libneuralnetworks.so");
- return handle;
-}
-
-/**
- * @brief Return function ptr in libneuralnetworks.so
- * @param[in] name Name of function
- * @return function pointer
- */
-inline void* loadFunction(const char* name) {
- void* fn = nullptr;
- if (getLibraryHandle() != nullptr) {
- fn = dlsym(getLibraryHandle(), name);
- }
- if (fn == nullptr) {
- NNAPI_LOG("nnapi error: unable to open function %s", name);
- NNAPI_LOG(" %s", dlerror());
- abort();
- }
- else {
-#ifdef _GNU_SOURCE
- Dl_info info;
- if (dladdr(fn, &info))
- {
- NNAPI_LOG("nnapi function '%s' is loaded from '%s' ", name, info.dli_fname);
- }
- else
- {
- NNAPI_LOG("nnapi function '%s' is failed to load", name);
- }
-
-#endif // _GNU_SOURCE
- }
- return fn;
-}
-
-/**
- * @brief Check if libneuralnetworks.so can be loaded
- * @return @c true if loading is successful, otherwise @c false.
- */
-inline bool NNAPIExists() {
- static bool nnapi_is_available = getLibraryHandle();
- return nnapi_is_available;
-}
-
-} // namespace nnfw
-
-#endif // __NEURAL_NETWORKS_LOAD_HELPER_H__
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h
deleted file mode 100644
index 60b16f766..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h
+++ /dev/null
@@ -1,709 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from part of the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h'
-#ifndef __NEURAL_NETWORKS_SHIM__
-#define __NEURAL_NETWORKS_SHIM__
-
-#include "NeuralNetworks.h"
-#include "NeuralNetworksLoadHelpers.h"
-
-// nn api function types
-
-typedef int (*ANeuralNetworksMemory_createFromFd_fn)(
- size_t size, int protect, int fd, size_t offset,
- ANeuralNetworksMemory** memory);
-
-typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory* memory);
-
-typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel** model);
-
-typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel* model);
-
-typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel* model);
-
-typedef int (*ANeuralNetworksCompilation_create_fn)(
- ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation);
-
-typedef void (*ANeuralNetworksCompilation_free_fn)(
- ANeuralNetworksCompilation* compilation);
-
-typedef int (*ANeuralNetworksCompilation_setPreference_fn)(
- ANeuralNetworksCompilation* compilation, int32_t preference);
-
-typedef int (*ANeuralNetworksCompilation_finish_fn)(
- ANeuralNetworksCompilation* compilation);
-
-typedef int (*ANeuralNetworksModel_addOperand_fn)(
- ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type);
-
-typedef int (*ANeuralNetworksModel_setOperandValue_fn)(
- ANeuralNetworksModel* model, int32_t index, const void* buffer,
- size_t length);
-
-typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
- ANeuralNetworksModel* model, int32_t index,
- const ANeuralNetworksMemory* memory, size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksModel_addOperation_fn)(
- ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
- uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
- const uint32_t* outputs);
-
-typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(
- ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
- uint32_t outputCount, const uint32_t* outputs);
-
-typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(
- ANeuralNetworksModel* model, bool allow);
-
-typedef int (*ANeuralNetworksExecution_create_fn)(
- ANeuralNetworksCompilation* compilation,
- ANeuralNetworksExecution** execution);
-
-typedef void (*ANeuralNetworksExecution_free_fn)(
- ANeuralNetworksExecution* execution);
-
-typedef int (*ANeuralNetworksExecution_setInput_fn)(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const void* buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
- size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutput_fn)(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, void* buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
- size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_startCompute_fn)(
- ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event);
-
-typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent* event);
-
-typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event);
-
-/**
- * Creates a shared memory object from a file descriptor.
- *
- * The shared memory is backed by a file descriptor via mmap.
- * See {@link ANeuralNetworksMemory} for a description on how to use
- * this shared memory.
- *
- * @param size The requested size in bytes.
- * Must not be larger than the file size.
- * @param prot The desired memory protection for the mapping.
- * It is either PROT_NONE or the bitwise OR of one or
- * more of the following flags: PROT_READ, PROT_WRITE.
- * @param fd The requested file descriptor.
- * The file descriptor has to be mmap-able. The file
- * descriptor will be duplicated.
- * @param offset The offset to the beginning of the file of the area to map.
- * The offset has to be aligned to a page size.
- * @param memory The memory object to be created.
- * Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
- */
-inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd,
- size_t offset,
- ANeuralNetworksMemory** memory) {
- LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd);
- EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory);
-}
-
-/**
- * Delete a memory object.
- *
- * Destroys the object used by the run time to keep track of the memory.
- * This will free the underlying actual memory if no other code has open
- * handles to this memory.
- *
- * @param memory The memory object to be freed.
- */
-inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) {
- LOAD_FUNCTION(ANeuralNetworksMemory_free);
- EXECUTE_FUNCTION(memory);
-}
-
-/**
- * Create an empty {@link ANeuralNetworksModel}.
- *
- * <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * The model should be constructed with calls to
- * {@link ANeuralNetworksModel_addOperation} and
- * {@link ANeuralNetworksModel_addOperand}
- *
- * <p>{@link ANeuralNetworksModel_finish} should be called once the model
- * has been fully constructed.</p>
- *
- * <p>{@link ANeuralNetworksModel_free} should be called once the model
- * is no longer needed.</p>
- *
- * @param model The {@link ANeuralNetworksModel} to be created.
- * Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_create(ANeuralNetworksModel** model) {
- LOAD_FUNCTION(ANeuralNetworksModel_create);
- EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Destroy a model.
- *
- * The model need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be destroyed. Passing NULL is acceptable and
- * results in no operation.
- */
-inline void ANeuralNetworksModel_free(ANeuralNetworksModel* model) {
- LOAD_FUNCTION(ANeuralNetworksModel_free);
- EXECUTE_FUNCTION(model);
-}
-
-/**
- * Indicate that we have finished modifying a model. Required before
- * calling {@link ANeuralNetworksCompilation_compile}.
- *
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be finished.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
- LOAD_FUNCTION(ANeuralNetworksModel_finish);
- EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Add an operand to a model.
- *
- * The order in which the operands are added is important. The first one added
- * to a model will have the index value 0, the second 1, etc. These indexes are
- * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
- * {@link ANeuralNetworksExecution_setInput},
- * {@link ANeuralNetworksExecution_setInputFromMemory},
- * {@link ANeuralNetworksExecution_setOutput},
- * {@link ANeuralNetworksExecution_setOutputFromMemory} and
- * {@link ANeuralNetworksExecution_setOperandValue}.
- *
- * To build a model that can accommodate inputs of various sizes, as you may
- * want to do for a CNN, set the size of the dimensions that will vary at run
- * time to 0. If you do so, provide the full dimensions when calling
- * {@link ANeuralNetworksExecution_setInput} or {@link
- * ANeuralNetworksExecution_setInputFromMemory}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param type The {@link ANeuralNetworksOperandType} that describes the shape
- * of the operand.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperand(
- ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
- LOAD_FUNCTION(ANeuralNetworksModel_addOperand);
- EXECUTE_FUNCTION_RETURN(model, type);
-}
-
-/**
- * Sets an operand to a constant value.
- *
- * For scalar values, the content of buffer is copied into the model.
- *
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model,
- int32_t index,
- const void* buffer,
- size_t length) {
- LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue);
- EXECUTE_FUNCTION_RETURN(model, index, buffer, length);
-}
-
-/**
- * Sets an operand to a value stored in a memory object.
- *
- * The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this
- * call yields undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- * The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValueFromMemory(
- ANeuralNetworksModel* model, int32_t index,
- const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
- LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory);
- EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length);
-}
-
-/**
- * Add an operation to a model.
- *
- * @param model The model to be modified.
- * @param type The type of the operation.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying each operand.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying each operand.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
- ANeuralNetworksOperationType type,
- uint32_t inputCount,
- const uint32_t* inputs,
- uint32_t outputCount,
- const uint32_t* outputs) {
- LOAD_FUNCTION(ANeuralNetworksModel_addOperation);
- EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount,
- outputs);
-}
-
-/**
- * Specifies which operands will be the model's inputs and outputs.
- *
- * An operand cannot be used for both input and output. Doing so will
- * return an error.
- *
- * @param model The model to be modified.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying the input operands.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying the output operands.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- */
-inline int ANeuralNetworksModel_identifyInputsAndOutputs(
- ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
- uint32_t outputCount, const uint32_t* outputs) {
- LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs);
- EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs);
-}
-
-/**
- * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
- * calculated with range and/or precision as low as that of the IEEE 754 16-bit
- * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * must be calculated using at least the range and precision of the IEEE 754
- * 32-bit floating-point format.
- *
- * @param model The model to be modified.
- * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
- * calculated with range and/or precision as low as that of the
- * IEEE 754 16-bit floating point format. 'false' indicates
- * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
- * at least the range and precision of the IEEE 754 32-bit floating
- * point format.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * Available since API level 28.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- */
-inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
- ANeuralNetworksModel* model, bool allow) {
- LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
- EXECUTE_FUNCTION_RETURN(model, allow);
-}
-
-/**
- * Create a {@link ANeuralNetworksCompilation} to compile the given model.
- * This only creates the object. Compilation is only performed once
- * {@link ANeuralNetworksCompilation_start} is invoked.
- *
- * <p>The provided model must outlive the compilation.</p>
- *
- * The model must already have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param model The {@link ANeuralNetworksModel} to be compiled.
- * @param compilation The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- * if the model is invalid.
- */
-inline int ANeuralNetworksCompilation_create(
- ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
- LOAD_FUNCTION(ANeuralNetworksCompilation_create);
- EXECUTE_FUNCTION_RETURN(model, compilation);
-}
-
-/**
- * Destroy a compilation.
- *
- * <p>If called on a compilation for which
- * {@link ANeuralNetworksCompilation_start} has been called, the
- * function will return immediately but will mark the compilation to be deleted
- * once the compilation completes. The {@link ANeuralNetworksCompilation_wait}
- * will return ERROR_DELETED.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be destroyed. Passing NULL is
- * acceptable and results in no operation.
- */
-inline void ANeuralNetworksCompilation_free(
- ANeuralNetworksCompilation* compilation) {
- LOAD_FUNCTION(ANeuralNetworksCompilation_free);
- EXECUTE_FUNCTION(compilation);
-}
-
-/**
- * Sets the execution preference.
- *
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be modified.
- * @param preference Either {@link PREFER_LOW_POWER},
- * {@link PREFER_SINGLE_FAST_ANSWER}, or
- * {@link PREFER_SUSTAINED_SPEED}.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksCompilation_setPreference(
- ANeuralNetworksCompilation* compilation, int32_t preference) {
- LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference);
- EXECUTE_FUNCTION_RETURN(compilation, preference);
-}
-
-/**
- * Waits until the compilation completes.
- *
- * More than one thread can wait on a compilation. When the compilation
- * completes, all threads will be released.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
- */
-inline int ANeuralNetworksCompilation_finish(
- ANeuralNetworksCompilation* compilation) {
- LOAD_FUNCTION(ANeuralNetworksCompilation_finish);
- EXECUTE_FUNCTION_RETURN(compilation);
-}
-/**
- * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
- * This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * <p>The provided compilation must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
- * @param execution The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- * if the compilation is invalid.
- */
-inline int ANeuralNetworksExecution_create(
- ANeuralNetworksCompilation* compilation,
- ANeuralNetworksExecution** execution) {
- LOAD_FUNCTION(ANeuralNetworksExecution_create);
- EXECUTE_FUNCTION_RETURN(compilation, execution);
-}
-
-/**
- * Destroy an execution.
- *
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes. The {link ANeuralNetworksExecution_wait}
- * will return ANEURALNETWORKS_ERROR_DELETED.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be destroyed. Passing NULL is acceptable
- * and results in no operation.
- */
-inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) {
- LOAD_FUNCTION(ANeuralNetworksExecution_free);
- EXECUTE_FUNCTION(execution);
-}
-
-/**
- * Associate a user buffer with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- * an index into the lists passed to
- * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- * the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This should be used to specify the
- * dimensions that were set to 0 when the operand was added to the
- * model. All other properties of the type must be the same as
- * specified in the model. If the type is the same as specified
- * when the model was built, NULL can be passed.
- * @param buffer The buffer containing the data.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInput(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
- LOAD_FUNCTION(ANeuralNetworksExecution_setInput);
- EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- * an index into the lists passed to
- * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- * the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- * dimensions that were set to 0 when the operand was added to the
- * model. All other values must be the same as specified in the
- * model. If the type is the same as specified when the model
- * was built, NULL can be passed.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- * The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInputFromMemory(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
- size_t offset, size_t length) {
- LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory);
- EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Associate a user buffer with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- * an index into the lists passed to
- * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- * the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- * dimensions that were set to 0 when the operand was added to the
- * model. All other values must be the same as specified in the
- * model. If the type is the same as specified when the model
- * was built, NULL can be passed.
- * @param buffer The buffer where the data is to be written.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutput(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
- LOAD_FUNCTION(ANeuralNetworksExecution_setOutput);
- EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- * an index into the lists passed to
- * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- * the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- * dimensions that were set to 0 when the operand was added to the
- * model. All other values must be the same as specified in the
- * model. If the type is the same as specified when the model
- * was built, NULL can be passed.
- * @param memory The memory where the data is to be stored.
- * @param offset This specifies the location of the data within the memory.
- * The offset is in bytes from the start of memory.
- * @param length The length in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutputFromMemory(
- ANeuralNetworksExecution* execution, int32_t index,
- const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
- size_t offset, size_t length) {
- LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory);
- EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Schedule evaluation of the execution.
- *
- * <p>Schedules evaluation of the execution. Once the model has been
- * applied and the outputs are ready to be consumed, the execution will be
- * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal.
- * </p>
- *
- * Multiple executions can be scheduled and evaluated concurrently, and
- * compilations can be performed concurrently with executions. The runtime makes
- * no guarantee on the ordering of the completion of compilations and
- * executions. If it's important to the application, the application should
- * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
- * {@link ANeuralNetworksExecution_wait}.
- *
- * ANeuralNetworksExecution_wait must be called to recuperate the resources used
- * by the execution.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be scheduled and executed.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_startCompute(
- ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
- LOAD_FUNCTION(ANeuralNetworksExecution_startCompute);
- EXECUTE_FUNCTION_RETURN(execution, event);
-}
-
-/**
- * Waits until the execution completes.
- *
- * More than one thread can wait on an event. When the execution completes,
- * all threads will be released.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- */
-inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
- LOAD_FUNCTION(ANeuralNetworksEvent_wait);
- EXECUTE_FUNCTION_RETURN(event);
-}
-
-/**
- * Destroys the event.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- */
-inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) {
- LOAD_FUNCTION(ANeuralNetworksEvent_free);
- EXECUTE_FUNCTION(event);
-}
-
-#endif // __NEURAL_NETWORKS_SHIM__
diff --git a/runtime/libs/nnapi/v1.2/CMakeLists.txt b/runtime/libs/nnapi/v1.2/CMakeLists.txt
deleted file mode 100644
index 21ec3015f..000000000
--- a/runtime/libs/nnapi/v1.2/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(nnfw_lib_nnapi_1_2 INTERFACE)
-
-target_include_directories(nnfw_lib_nnapi_1_2 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_nnapi_1_2 INTERFACE nnfw-nnapi-header)
diff --git a/runtime/nnapi-header/include/NeuralNetworks.h b/runtime/nnapi-header/include/NeuralNetworks.h
index 7400806d8..0c54d7582 100644
--- a/runtime/nnapi-header/include/NeuralNetworks.h
+++ b/runtime/nnapi-header/include/NeuralNetworks.h
@@ -24,8 +24,8 @@
* @file NeuralNetworks.h
*/
-#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
-#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
/******************************************************************
*
@@ -43,16 +43,14 @@
* - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES
*/
-// For compatibility with android, check __ANDROID_API__ is defined
-// If __ANDROID_API__ is pre-defined, this header may be used for android
-#ifndef __ANDROID_API__
-#define __ANDROID_API__ 29
-#define __ANDROID_API_Q__ 29
+// For compatibility with android, check __ANDROID__ is defined
+#ifndef __ANDROID__
+#define __ANDROID_API__ 30
#define __INTRODUCED_IN(api_level)
typedef struct AHardwareBuffer AHardwareBuffer;
#else
#include <android/hardware_buffer.h>
-#endif // __ANDROID_API__
+#endif // __ANDROID__
#include <stddef.h>
#include <stdint.h>
#include <sys/cdefs.h>
@@ -62,7 +60,11 @@ __BEGIN_DECLS
/**
* Operand types.
*
- * The type of operands that can be added to a model.
+ * The type of an operand in a model.
+ *
+ * Types prefaced with ANEURALNETWORKS_TENSOR_* must be used for tensor data (i.e., tensors
+ * with at least one dimension). Types not prefaced by ANEURALNETWORKS_TENSOR_* represent
+ * scalar values and must have no dimensions.
*
* Although we define many types, most operators accept just a few
* types. Most used are {@link ANEURALNETWORKS_TENSOR_FLOAT32},
@@ -94,7 +96,6 @@ typedef enum {
* real_value = (integer_value - zeroPoint) * scale.
*/
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5,
-#if __ANDROID_API__ >= __ANDROID_API_Q__
/**
* An 8 bit boolean scalar value.
*
@@ -160,7 +161,6 @@ typedef enum {
* Available since API level 29.
*/
ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
-
/**
* A tensor of 16 bit unsigned integers that represent real numbers.
*
@@ -175,7 +175,6 @@ typedef enum {
* Available since API level 29.
*/
ANEURALNETWORKS_TENSOR_QUANT16_ASYMM = 12,
-
/**
* A tensor of 8 bit signed integers that represent real numbers.
*
@@ -188,14 +187,36 @@ typedef enum {
* Available since API level 29.
*/
ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
-#endif // __ANDROID_API__ >= __ANDROID_API_Q__
+ /**
+ * A tensor of 8 bit signed integers that represent real numbers.
+ *
+ * Attached to this tensor are two numbers that can be used to convert the
+ * 8 bit integer to the real value and vice versa. These two numbers are:
+ * - scale: a 32 bit floating point value greater than zero.
+ * - zeroPoint: a 32 bit integer, in range [-128, 127].
+ *
+ * The formula is:
+ * real_value = (integer_value - zeroPoint) * scale.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
+ /**
+ * A reference to a model.
+ *
+ * {@link ANeuralNetworksModel_setOperandValueFromModel} must be used to set
+ * the value for an Operand of this type.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_MODEL = 15,
} OperandCode;
/**
* Operation types.
*
- * The type of operations that can be added to a model.
+ * The type of an operation in a model.
*
* Available since API level 27.
*/
@@ -231,6 +252,8 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -238,15 +261,19 @@ typedef enum {
* * 0: A tensor.
* * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
* as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scales and zeroPoint can be different from input0 scale and zeroPoint.
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
+ * For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+ * the {@link FuseCode} must be "NONE".
*
* Outputs:
* * 0: The sum, a tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 27.
@@ -270,18 +297,20 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both explicit padding and implicit padding are supported.
*
* Inputs (explicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -307,8 +336,8 @@ typedef enum {
*
* Inputs (implicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
* padding scheme, has to be one of the
* {@link PaddingCode} values.
@@ -330,7 +359,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, out_height, out_width, depth].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -346,8 +376,9 @@ typedef enum {
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
- * level 29, see the input section)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * (full support since API level 29, see the input section)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -357,6 +388,9 @@ typedef enum {
* Before API level 29, all input tensors of
* {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
* must have the same scale and zeroPoint as the output tensor.
+ * Input tensors of
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * are allowed to have different scale and zeroPoint.
* Since API level 29, zero-sized tensors are supported.
* * n: An {@link ANEURALNETWORKS_INT32} scalar, specifying the
* concatenation axis.
@@ -373,7 +407,7 @@ typedef enum {
ANEURALNETWORKS_CONCATENATION = 2,
/**
- * Performs an 2-D convolution operation.
+ * Performs a 2-D convolution operation.
*
* The CONV_2D op sweeps a 2-D filter that can mix channels together over a
* batch of images, applying the filter to each window of each image of the
@@ -409,31 +443,46 @@ typedef enum {
* * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
* * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
*
+ * Available since API level 30:
+ * * Quantized signed (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+ * * * input.scale * filter.scale).
+ *
+ * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+ * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+ *
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both explicit padding and implicit padding are supported.
*
* Inputs (explicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
- * specifying the input. Since API level 29, zero batches is supported
- * for this tensor.
+ * specifying the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: A 4-D tensor, of shape
* [depth_out, filter_height, filter_width, depth_in], specifying the
- * filter. For tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
- * dimension (extraParams.channelQuant.channelDim) must be set to 0.
+ * filter.
+ * For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+ * the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+ * must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
- * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
- * of 0 and bias_scale == input_scale * filter_scale. For filter tensor
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
* bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
@@ -466,22 +515,25 @@ typedef enum {
*
* Inputs (implicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
- * specifying the input. Since API level 29, zero batches is supported
- * for this tensor.
+ * specifying the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: A 4-D tensor, of shape
* [depth_out, filter_height, filter_width, depth_in], specifying the
- * filter. For tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
- * dimension (extraParams.channelQuant.channelDim) must be set to 0.
+ * filter.
+ * For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+ * the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+ * must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
- * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same
+ * type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
- * of 0 and bias_scale == input_scale * filter_scale. For filter tensor
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
* bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
* padding scheme, has to be one of the
@@ -509,10 +561,9 @@ typedef enum {
*
* Outputs:
* * 0: The output 4-D tensor, of shape
- * [batches, out_height, out_width, depth_out]. Before API level 29,
- * for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
- * the following condition must be satisfied:
- * output_scale > input_scale * filter_scale
+ * [batches, out_height, out_width, depth_out].
+ * Before API level 29, for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * the following condition must be satisfied: output_scale > input_scale * filter_scale
*
* Available since API level 27.
*/
@@ -559,10 +610,23 @@ typedef enum {
* * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
* * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
*
+ * Available since API level 30:
+ * * Quantized signed (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+ * * * input.scale * filter.scale).
+ *
+ * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+ * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+ *
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both explicit padding and implicit padding are supported.
*
@@ -570,18 +634,20 @@ typedef enum {
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
* specifying the input.
* * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
- * specifying the filter. For tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
- * dimension (extraParams.channelQuant.channelDim) must be set to 3.
+ * specifying the filter.
+ * For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+ * the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+ * must be set to 3.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
- * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
- * of 0 and bias_scale == input_scale * filter_scale. For filter tensor
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
* bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
@@ -620,14 +686,15 @@ typedef enum {
* * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
* specifying the filter.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
- * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
- * of 0 and bias_scale == input_scale * filter_scale. For filter tensor
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
* bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
* padding scheme, has to be one of the
@@ -654,12 +721,11 @@ typedef enum {
* cells between each filter element on height dimension. If this input is set,
* input 9 (dilation factor for width) must be specified as well.
* Available since API level 29.
-
*
* Outputs:
* * 0: The output 4-D tensor, of shape
- * [batches, out_height, out_width, depth_out]. Before API level 29,
- * for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * [batches, out_height, out_width, depth_out]. Before API level 29, for
+ * output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
* the following condition must be satisfied:
* output_scale > input_scale * filter_scale
*
@@ -686,11 +752,13 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Inputs:
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
@@ -705,7 +773,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape [batch, height*block_size,
* width*block_size, depth/(block_size*block_size)].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -723,6 +792,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported output tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
@@ -731,7 +801,8 @@ typedef enum {
* Supported tensor rank: up to 4
*
* Inputs:
- * * 0: A tensor. Since API level 29, this tensor may be zero-sized.
+ * * 0: A tensor.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: A tensor with the same shape as input0.
@@ -761,9 +832,11 @@ typedef enum {
* and an error must be reported.
*
* Supported value tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 30)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_INT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported value tensor rank: from 2
*
@@ -777,7 +850,8 @@ typedef enum {
* * 0: A n-D tensor with the same rank and shape as the Values
* tensor, except for the first dimension which has the same size
* as Lookups' only dimension.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input1.
*
* Available since API level 27.
@@ -816,6 +890,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
@@ -826,26 +901,26 @@ typedef enum {
* [batch_size, input_size], where "input_size" corresponds to the
* number of inputs to the layer, matching the second dimension of
* weights, and "batch_size" is calculated by dividing the number of
- * elements by "input_size". Since API level 29, zero batch_size is
- * supported for this tensor.
+ * elements by "input_size".
+ * Since API level 29, zero batch_size is supported for this tensor.
* * 1: A 2-D tensor, specifying the weights, of shape
* [num_units, input_size], where "num_units" corresponds to the number
* of output nodes.
* * 2: A 1-D tensor, of shape [num_units], specifying the bias. For input
* tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should
- * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
- * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
- * bias_scale == input_scale * filter_scale.
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+ * with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
*
* Outputs:
- * * 0: The output tensor, of shape [batch_size, num_units]. Before API
- * level 29, for output tensor of {@link
- * ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following condition must
- * be satisfied: output_scale > input_scale * filter_scale.
+ * * 0: The output tensor, of shape [batch_size, num_units]. Before API level 29, for
+ * output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
*
* Available since API level 27.
*/
@@ -911,7 +986,7 @@ typedef enum {
ANEURALNETWORKS_HASHTABLE_LOOKUP = 10,
/**
- * Applies L2 normalization along the depth dimension.
+ * Applies L2 normalization along the axis dimension.
*
* The values in the output tensor are computed as:
*
@@ -919,13 +994,13 @@ typedef enum {
* input[batch, row, col, channel] /
* sqrt(sum_{c} pow(input[batch, row, col, c], 2))
*
- * For input tensor with rank less than 4, independently normalizes each
- * 1-D slice along dimension dim.
+ * By default the axis dimension is the last dimension of the input tensor.
*
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
* Tensors with rank less than 4 are only supported since API level 29.
@@ -942,6 +1017,12 @@ typedef enum {
* * 0: A tensor of the same {@link OperandCode} and same shape as input0.
* For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
* the scale must be 1.f / 128 and the zeroPoint must be 128.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the scale must be 1.f / 128 and the zeroPoint must be 0.
+ *
+ * NOTE: Before API level 30, if the elements along an axis are all zeros,
+ * the result is undefined. Since API level 30, if the elements along an axis
+ * are all zeros, the result is logical zero.
*
* Available since API level 27.
*/
@@ -967,13 +1048,14 @@ typedef enum {
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both explicit padding and implicit padding are supported.
*
* Inputs (explicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -999,8 +1081,8 @@ typedef enum {
*
* Inputs (implicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
* padding scheme, has to be one of the
* {@link PaddingCode} values.
@@ -1095,17 +1177,20 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
* Inputs:
- * * 0: A tensor, specifying the input. Since API level 29, this tensor may
- * be zero-sized.
+ * * 0: A tensor, specifying the input.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: The output tensor of same shape as input0.
* For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
* the scale must be 1.f / 256 and the zeroPoint must be 0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the scale must be 1.f / 256 and the zeroPoint must be -128.
*
* Available since API level 27.
*/
@@ -1158,7 +1243,7 @@ typedef enum {
* Outputs:
* * 0: If the projection type is Sparse:
* Output.Dim == { Tensor[0].Dim[0] }
- * A tensor of int32 that represents hash signatures,
+ * A tensor of int32 that represents hash signatures.
*
* If the projection type is Dense:
* Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
@@ -1248,7 +1333,7 @@ typedef enum {
* * The projection bias (\f$b_{proj}\f$) may (but not required to) have a
* value if the recurrent projection layer exists, and should otherwise
* have no value.
- * * (API level >= 29) The four layer normalization weights either all have
+ * * (API level 29 or later) The four layer normalization weights either all have
* values or none of them have values. Additionally, if CIFG is used,
* input layer normalization weights tensor is omitted and the other layer
* normalization weights either all have values or none of them have
@@ -1406,18 +1491,20 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both explicit padding and implicit padding are supported.
*
* Inputs (explicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -1443,8 +1530,8 @@ typedef enum {
*
* Inputs (implicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
* padding scheme, has to be one of the
* {@link PaddingCode} values.
@@ -1466,7 +1553,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, out_height, out_width, depth].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1496,6 +1584,8 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -1506,10 +1596,13 @@ typedef enum {
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
+ * For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+ * the {@link FuseCode} must be "NONE".
*
* Outputs:
* * 0: The product, a tensor of the same {@link OperandCode} as input0.
- * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the following condition must be satisfied:
* output_scale > input1_scale * input2_scale.
*
@@ -1528,16 +1621,18 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
* Inputs:
- * * 0: A tensor, specifying the input. Since API level 29, this tensor may
- * be zero-sized.
+ * * 0: A tensor, specifying the input.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: The output tensor of same shape as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1555,16 +1650,18 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
* Inputs:
- * * 0: A tensor, specifying the input. Since API level 29, this tensor may
- * be zero-sized.
+ * * 0: A tensor, specifying the input.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: The output tensor of the same shape as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1582,16 +1679,18 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
* Inputs:
- * * 0: A tensor, specifying the input. Since API level 29, this tensor may
- * be zero-sized.
+ * * 0: A tensor, specifying the input.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: The output tensor of same shape as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1608,6 +1707,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
@@ -1624,7 +1724,8 @@ typedef enum {
*
* Outputs:
* * 0: The output tensor, of shape specified by the input shape.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1642,18 +1743,20 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Both resizing by shape and resizing by scale are supported.
*
* Inputs (resizing by shape):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
- * the input. Since API level 29, zero batches is supported for this
- * tensor.
+ * the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output
* width of the output tensor.
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output
@@ -1661,6 +1764,17 @@ typedef enum {
* * 3: An optional {@link ANEURALNETWORKS_BOOL} scalar, default to false.
* Set to true to specify NCHW data layout for input0 and output0.
* Available since API level 29.
+ * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the centers of the 4 corner
+ * pixels of the input and output tensors are aligned, preserving the
+ * values at the corner pixels.
+ * Available since API level 30.
+ * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the pixel centers are assumed to
+ * be at (0.5, 0.5). This is the default behavior of image.resize in
+ * TF 2.0. If this parameter is True, then align_corners parameter
+ * must be False.
+ * Available since API level 30.
*
* Inputs (resizing by scale, since API level 29):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
@@ -1679,10 +1793,24 @@ typedef enum {
* {@link ANEURALNETWORKS_FLOAT32} otherwise.
* * 3: An optional {@link ANEURALNETWORKS_BOOL} scalar, default to false.
* Set to true to specify NCHW data layout for input0 and output0.
+ * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the centers of the 4 corner
+ * pixels of the input and output tensors are aligned, preserving the
+ * values at the corner pixels.
+ * Available since API level 30.
+ * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the pixel centers are assumed to
+ * be at (0.5, 0.5). This is the default behavior of image.resize in
+ * TF 2.0. If this parameter is True, then align_corners parameter
+ * must be False.
+ * Available since API level 30.
*
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, new_height, new_width, depth].
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+ * the scale and zeroPoint must be the same as input0.
* For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
* the scale and zeroPoint must be the same as input0.
*
@@ -1762,19 +1890,21 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
* Tensors with rank other than 2 or 4 are only supported since API level 29.
*
* Inputs:
- * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped. Since
- * API level 29, this tensor may be zero-sized.
+ * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+ * Since API level 29, this tensor may be zero-sized.
* * 1: A scalar, specifying the positive scaling factor for the exponent,
- * beta. If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the scalar must be of
- * {@link ANEURALNETWORKS_FLOAT32}. If input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT16}, then the scalar must be of {@link
- * ANEURALNETWORKS_FLOAT16}.
+ * beta. If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, the scalar
+ * must be of {@link ANEURALNETWORKS_FLOAT32}.
+ * If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16}, then the
+ * scalar must be of {@link ANEURALNETWORKS_FLOAT16}.
* * 2: An optional {@link ANEURALNETWORKS_INT32} scalar, default to -1,
* specifying the dimension the activation would be performed on.
* Negative index is used to specify axis from the end (e.g. -1 for
@@ -1785,6 +1915,8 @@ typedef enum {
* * 0: The output tensor of same shape as input0.
* For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
* the scale must be 1.f / 256 and the zeroPoint must be 0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the scale must be 1.f / 256 and the zeroPoint must be -128.
*
* Available since API level 27.
*/
@@ -1808,11 +1940,13 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Inputs:
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
@@ -1827,7 +1961,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape [batches, height/block_size,
* width/block_size, depth_in*block_size*block_size].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 27.
@@ -1924,17 +2059,20 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4.
*
* Inputs:
- * * 0: A tensor, specifying the input. Since API level 29, this tensor may
- * be zero-sized.
+ * * 0: A tensor, specifying the input.
+ * Since API level 29, this tensor may be zero-sized.
*
* Outputs:
* * 0: The output tensor of same shape as input0.
* For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
* the scale must be 1.f / 128 and the zeroPoint must be 128.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the scale must be 1.f / 128 and the zeroPoint must be 0.
*
* Available since API level 27.
*/
@@ -1942,7 +2080,6 @@ typedef enum {
// Operations below are available since API level 28.
- // TODO: make the description easier to understand.
/**
* BatchToSpace for N-dimensional tensors.
*
@@ -1957,11 +2094,13 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Inputs:
* * 0: An n-D tensor, specifying the tensor to be reshaped
@@ -1974,7 +2113,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 28.
@@ -1988,6 +2128,11 @@ typedef enum {
* dimensions. The output is the result of dividing the first input tensor
* by the second, optionally modified by an activation function.
*
+ * For inputs of {@link ANEURALNETWORKS_TENSOR_INT32}, performs
+ * "floor division" ("//" in Python). For example,
+ * 5 // 2 = 2
+ * -5 // 2 = -3
+ *
* Two dimensions are compatible when:
* 1. they are equal, or
* 2. one of them is 1
@@ -2008,6 +2153,7 @@ typedef enum {
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2018,6 +2164,8 @@ typedef enum {
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
+ * For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+ * the {@link FuseCode} must be "NONE".
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
@@ -2038,6 +2186,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2057,23 +2206,27 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
- * the scale and zeroPoint must be same as input0.
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+ * the scale and zeroPoint must be the same as input0.
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
*
* Available since API level 28.
*/
ANEURALNETWORKS_MEAN = 31,
/**
- * Pads a tensor with zeros.
+ * Pads a tensor.
*
* This operation pads a tensor according to the specified paddings.
*
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
- * level 29, see the output section)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ * (full support since API level 29, see the output section)
*
* Supported tensor rank: up to 4
*
@@ -2095,7 +2248,8 @@ typedef enum {
* of the padding:
* output0.dimension[i] =
* padding[i, 0] + input0.dimension[i] + padding[i, 1]
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* NOTE: Before API level 29, the pad value for
@@ -2106,7 +2260,6 @@ typedef enum {
*/
ANEURALNETWORKS_PAD = 32,
- // TODO: make the description easier to understand.
/**
* SpaceToBatch for N-Dimensional tensors.
*
@@ -2121,13 +2274,15 @@ typedef enum {
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
- * level 29, see the output section)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ * (full support since API level 29, see the output section)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
* be NCHW, the data storage order of: [batch, channels, height, width].
+ * NCHW is supported since API level 29.
*
* Inputs:
* * 0: An n-D tensor, specifying the input.
@@ -2148,7 +2303,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* NOTE: Before API level 29, the pad value for
@@ -2171,6 +2327,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2186,8 +2343,11 @@ typedef enum {
* * 0: A tensor of the same {@link OperandCode} as input0. Contains the
* same data as input, but has one or more dimensions of size 1
* removed.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
+ * If all input dimensions are equal to 1 and are to be squeezed, the
+ * output shape is [1].
*
* Available since API level 28.
*/
@@ -2206,6 +2366,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2235,8 +2396,11 @@ typedef enum {
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0 and rank (n - k),
* where k is the number of bits set in shrink_axis_mask.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
+ * If shrink_axis_mask is true for all input dimensions, the output
+ * shape is [1].
*
* Available since API level 28.
*/
@@ -2270,6 +2434,8 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2280,10 +2446,13 @@ typedef enum {
* * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
+ * For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+ * the {@link FuseCode} must be "NONE".
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 28.
@@ -2303,6 +2472,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2314,7 +2484,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 28.
@@ -2329,6 +2500,7 @@ typedef enum {
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
*
* Supported tensor rank: from 1.
*
@@ -2350,6 +2522,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -2361,6 +2534,7 @@ typedef enum {
*
* Outputs:
* * 0: An (n - 1)-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor.
+ * If input is 1-dimensional, the output shape is [1].
*
* Available since API level 29.
*/
@@ -2376,6 +2550,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -2387,6 +2562,7 @@ typedef enum {
*
* Outputs:
* * 0: An (n - 1)-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor.
+ * If input is 1-dimensional, the output shape is [1].
*
* Available since API level 29.
*/
@@ -2419,7 +2595,8 @@ typedef enum {
* and height, dw and dh is the log-scale relative correction factor
* for the width and height. For input0 of type
* {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, this tensor should be
- * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. Zero num_rois is
+ * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}. Zero num_rois is
* supported for this tensor.
* * 2: An 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
* [num_rois], specifying the batch index of each box. Boxes with
@@ -2441,7 +2618,54 @@ typedef enum {
ANEURALNETWORKS_AXIS_ALIGNED_BBOX_TRANSFORM = 41,
/**
- * Performs a forward LSTM on the input followed by a backward LSTM.
+ * A recurrent neural network layer that applies an LSTM cell to a
+ * sequence of inputs in forward and backward directions.
+ *
+ * The op supports cross-linking via an auxiliary input. Regular cell feeds
+ * one input into the two RNN cells in the following way:
+ *
+ * INPUT (INPUT_REVERSED)
+ * | |
+ * ---------------------
+ * | FW_LSTM BW_LSTM |
+ * ---------------------
+ * | |
+ * FW_OUT BW_OUT
+ *
+ * An op with cross-linking takes two inputs and feeds them into the RNN
+ * cells in the following way:
+ *
+ * AUX_INPUT (AUX_INPUT_REVERSED)
+ * | |
+ * INPUT | (INPUT_R'D.)|
+ * | | | |
+ * -----------------------
+ * | \ / \ / |
+ * | FW_LSTM BW_LSTM |
+ * -----------------------
+ * | |
+ * FW_OUT BW_OUT
+ *
+ * The cross-linking mode is enabled iff auxiliary input and auxiliary
+ * weights are present. While stacking this op on top of itself, this
+ * allows to connect both forward and backward outputs from previous cell
+ * to the next cell's input.
+ *
+ * Since API level 30 parallel linking mode is supported. The mode is
+ * enabled if auxiliary input is present but auxiliary weights are omitted.
+ * In this case, the cell feeds inputs into the RNN in the following way:
+ *
+ * INPUT (AUX_INPUT_REVERSED)
+ * | |
+ * ---------------------
+ * | FW_LSTM BW_LSTM |
+ * ---------------------
+ * | |
+ * FW_OUT BW_OUT
+ *
+ * While stacking this op on top of itself, this allows to connect both
+ * forward and backward outputs from previous cell to the next cell's
+ * corresponding inputs.
*
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
@@ -2451,7 +2675,6 @@ typedef enum {
*
* All input and output tensors must be of the same type.
*
- *
* Inputs:
* * 0: The input.
* A 3-D tensor of shape:
@@ -2543,25 +2766,34 @@ typedef enum {
* * 38: The backward input cell state.
* A 2-D tensor of shape [batch_size, bw_num_units].
* * 39: The auxiliary input. Optional.
- * A 3-D tensor of shape [max_time, batch_size, input_size], where “batch_size”
- * corresponds to the batching dimension, and “input_size” is the size
- * of the input.
- * * 40: The forward auxiliary input-to-input weights. Optional.
- * A 2-D tensor of shape [fw_num_units, input_size].
- * * 41: The forward auxiliary input-to-forget weights. Optional.
- * A 2-D tensor of shape [fw_num_units, input_size].
- * * 42: The forward auxiliary input-to-cell weights. Optional.
- * A 2-D tensor of shape [fw_num_units, input_size].
- * * 43: The forward auxiliary input-to-output weights. Optional.
- * A 2-D tensor of shape [fw_num_units, input_size].
- * * 44: The backward auxiliary input-to-input weights. Optional.
- * A 2-D tensor of shape [bw_num_units, input_size].
- * * 45: The backward auxiliary input-to-forget weights. Optional.
- * A 2-D tensor of shape [bw_num_units, input_size].
- * * 46: The backward auxiliary input-to-cell weights. Optional.
- * A 2-D tensor of shape [bw_num_units, input_size].
- * * 47: The backward auxiliary input-to-output weights. Optional.
- * A 2-D tensor of shape [bw_num_units, input_size].
+ * A 3-D tensor of shape [max_time, batch_size, aux_input_size],
+ * where “batch_size” corresponds to the batching dimension, and
+ * “aux_input_size” is the size of the auxiliary input. Optional. See
+ * the docs above for the usage modes explanation.
+ * * 40: The forward auxiliary input-to-input weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [fw_num_units, aux_input_size].
+ * * 41: The forward auxiliary input-to-forget weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [fw_num_units, aux_input_size].
+ * * 42: The forward auxiliary input-to-cell weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [fw_num_units, aux_input_size].
+ * * 43: The forward auxiliary input-to-output weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [fw_num_units, aux_input_size].
+ * * 44: The backward auxiliary input-to-input weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [bw_num_units, aux_input_size].
+ * * 45: The backward auxiliary input-to-forget weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [bw_num_units, aux_input_size].
+ * * 46: The backward auxiliary input-to-cell weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [bw_num_units, aux_input_size].
+ * * 47: The backward auxiliary input-to-output weights.
+ * Optional. See the docs above for the usage modes explanation.
+ * A 2-D tensor of shape [bw_num_units, aux_input_size].
* * 48: The activation function.
* A value indicating the activation function:
* <ul>
@@ -2576,17 +2808,17 @@ typedef enum {
* then clipping is disabled.
* If all the input tensors have type {@link ANEURALNETWORKS_TENSOR_FLOAT32},
* this scalar must be of the type {@link ANEURALNETWORKS_FLOAT32},
- * otherwise if all the input tensors have the type {@link
- * ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be of type {@link
- * ANEURALNETWORKS_FLOAT16}.
+ * otherwise if all the input tensors have the type
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be
+ * of type {@link ANEURALNETWORKS_FLOAT16}.
* * 50: The clipping threshold for the output from the
* projection layer, such that values are bound within
* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
* If all the input tensors have type {@link ANEURALNETWORKS_TENSOR_FLOAT32},
* this scalar must be of the type {@link ANEURALNETWORKS_FLOAT32},
- * otherwise if all the input tensors have the type {@link
- * ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be of type {@link
- * ANEURALNETWORKS_FLOAT16}.
+ * otherwise if all the input tensors have the type
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be
+ * of type {@link ANEURALNETWORKS_FLOAT16}.
* * 51: merge_outputs
* An {@link ANEURALNETWORKS_BOOL} scalar specifying if the outputs
* from forward and backward cells should be merged.
@@ -2633,8 +2865,36 @@ typedef enum {
* A 3-D tensor of shape:
* If time-major: [max_time, batch_size, bw_output_size]
* If batch-major: [batch_size, max_time, bw_output_size]
+ * * 2: The forward activation state output.
+ * A 2-D tensor of shape [batch_size, fw_output_size] containing an
+ * activation state from the last time step in the sequence. This
+ * output is optional and can be omitted. If this output is present
+ * then outputs 3-5 must be present as well.
+ * Available since API level 30.
+ * * 3: The forward cell state output.
+ * A tensor of shape [batch_size, fw_cell_size] containing a cell state
+ * from the last time step in the sequence. This output is optional
+ * and can be omitted. If this output is present
+ * then outputs 2, 4, 5 must be present as well.
+ * Available since API level 30.
+ * * 4: The backward activation state output.
+ * A 2-D tensor of shape [batch_size, bw_output_size] containing an
+ * activation state from the last time step in the sequence. This
+ * output is optional and can be omitted. If this output is present
+ * then outputs 2, 3, 5 must be present as well.
+ * Available since API level 30.
+ * * 5: The backward cell state output.
+ * A tensor of shape [batch_size, bw_cell_size] containing a cell state
+ * from the last time step in the sequence. This output is optional
+ * and can be omitted. If this output is present
+ * then outputs 2-4 must be present as well.
+ * Available since API level 30.
*
* Available since API level 29.
+ *
+ * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+ * does not maintain internal states. This operator does not support the usage pattern in which
+ * multiple cells are chained and state tensors are propagated.
*/
ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM = 42,
@@ -2662,8 +2922,8 @@ typedef enum {
* * “activation” is the function passed as the “fused_activation_function”
* argument (if not “NONE”).
*
- * The op also supports an auxiliary input. Regular cell feeds one input
- * into the two RNN cells in the following way:
+ * The op supports cross-linking via an auxiliary input. Regular cell feeds
+ * one input into the two RNN cells in the following way:
*
* INPUT (INPUT_REVERSED)
* | |
@@ -2673,8 +2933,8 @@ typedef enum {
* | |
* FW_OUT BW_OUT
*
- * An op with an auxiliary input takes two inputs and feeds them into the
- * RNN cells in the following way:
+ * An op with cross-linking takes two inputs and feeds them into the RNN
+ * cells in the following way:
*
* AUX_INPUT (AUX_INPUT_REVERSED)
* | |
@@ -2687,9 +2947,26 @@ typedef enum {
* | |
* FW_OUT BW_OUT
*
+ * The cross-linking mode is enabled iff auxiliary input and auxiliary
+ * weights are present. While stacking this op on top of itself, this
+ * allows to connect both forward and backward outputs from previous cell
+ * to the next cell's input.
+ *
+ * Since API level 30 parallel linking mode is supported. The mode is
+ * enabled if auxiliary input is present but auxiliary weights are omitted.
+ * In this case, the cell feeds inputs into the RNN in the following way:
+ *
+ * INPUT (AUX_INPUT_REVERSED)
+ * | |
+ * ---------------------
+ * | FW_RNN BW_RNN |
+ * ---------------------
+ * | |
+ * FW_OUT BW_OUT
+ *
* While stacking this op on top of itself, this allows to connect both
* forward and backward outputs from previous cell to the next cell's
- * inputs.
+ * corresponding inputs.
*
* Supported tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
@@ -2722,11 +2999,17 @@ typedef enum {
* A 2-D tensor of shape [batchSize, bwNumUnits]. Specifies a hidden
* state input for the first time step of the computation.
* * 9: auxInput.
- * A 3-D tensor. The shape is the same as of the input 0.
+ * A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+ * it is set to true, then the input has a shape [maxTime, batchSize,
+ * auxInputSize], otherwise the input has a shape [batchSize, maxTime,
+ * auxInputSize]. Can be omitted. See the docs above for the usage
+ * modes explanation.
* * 10:fwAuxWeights.
- * A 2-D tensor of shape [fwNumUnits, inputSize].
+ * A 2-D tensor of shape [fwNumUnits, auxInputSize]. Can be omitted.
+ * See the docs above for the usage modes explanation.
* * 11:bwAuxWeights.
- * A 2-D tensor of shape [bwNumUnits, inputSize].
+ * A 2-D tensor of shape [bwNumUnits, auxInputSize]. Can be omitted.
+ * See the docs above for the usage modes explanation.
* * 12:fusedActivationFunction.
* A {@link FuseCode} value indicating the activation function. If
* “NONE” is specified then it results in a linear activation.
@@ -2752,8 +3035,24 @@ typedef enum {
* (timeMajor). If it is set to true, then the shape is set to
* [maxTime, batchSize, bwNumUnits], otherwise the shape is set to
* [batchSize, maxTime, bwNumUnits].
+ * * 2: The forward hidden state output.
+ * A 2-D tensor of shape [batchSize, fwNumUnits] containing a hidden
+ * state from the last time step in the sequence. This output is
+ * optional and can be omitted. If this output is present then output
+ * 3 must be present as well.
+ * Available since API level 30.
+ * * 3: The backward hidden state output.
+ * A 2-D tensor of shape [batchSize, bwNumUnits] containing a hidden
+ * state from the last time step in the sequence. This output is
+ * optional and can be omitted. If this output is present then output
+ * 2 must be present as well.
+ * Available since API level 30.
*
* Available since API level 29.
+ *
+ * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+ * does not maintain internal states. This operator does not support the usage pattern in which
+ * multiple cells are chained and state tensors are propagated.
*/
ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_RNN = 43,
@@ -2780,6 +3079,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Inputs:
* * 0: A 2-D Tensor of shape [num_rois, num_classes], specifying the score
@@ -2791,7 +3091,11 @@ typedef enum {
* order of the boxes corresponds with input0. For input0 of type
* {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should be of
* {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with zeroPoint of 0 and
- * scale of 0.125. Zero num_rois is supported for this tensor.
+ * scale of 0.125.
+ * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * this tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM},
+ * with zeroPoint of -128 and scale of 0.125.
+ * Zero num_rois is supported for this tensor.
* * 2: A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
* [num_rois], specifying the batch index of each box. Boxes with
* the same batch index are grouped together.
@@ -2818,6 +3122,8 @@ typedef enum {
* [num_output_rois], specifying the score of each output box. The boxes
* are grouped by batches, but the sequential order in each batch is not
* guaranteed. For type of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * guaranteed. For type of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * or {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the scale and zero point must be the same as input0.
* * 1: A 2-D Tensor of the same {@link OperandCode} as input1, with shape
* [num_output_rois, 4], specifying the coordinates of each
@@ -2837,7 +3143,7 @@ typedef enum {
ANEURALNETWORKS_BOX_WITH_NMS_LIMIT = 44,
/**
- * Casts a tensor to a new type.
+ * Casts a tensor to a type.
*
* This operation ignores the scale and zeroPoint of quanized tensors,
* e.g. it treats a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} input
@@ -2848,6 +3154,14 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * Since API level 30, casting tensors of the following
+ * {@link OperandCode} to the same {@link OperandCode} is supported:
+ * * {@link ANEURALNETWORKS_TENSOR_BOOL8}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
*
* Supported tensor rank: from 1
*
@@ -2880,6 +3194,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -2894,7 +3209,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} and same shape as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -2952,14 +3268,14 @@ typedef enum {
* * 11: A scalar, score_threshold. Boxes with scores lower than the
* threshold are filtered before sending to the NMS algorithm. The
* scalar must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
- * ANEURALNETWORKS_FLOAT32} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+ * {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
* * 12: A scalar, specifying the IoU threshold for hard NMS. The scalar
- * must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
- * ANEURALNETWORKS_FLOAT32} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+ * {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
* * 13: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to include
* background class in the list of label map for the output, set
* to false to not include the background. When the background
@@ -2992,6 +3308,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3041,6 +3358,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3052,7 +3370,8 @@ typedef enum {
* Outputs:
* * 0: An (n + 1)-D tensor with the same {@link OperandCode} and data as
* input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -3078,6 +3397,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3092,7 +3412,8 @@ typedef enum {
*
* Outputs:
* * 0: An (n + k - 1)-D tensor with the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -3115,6 +3436,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Inputs:
* * 0: A 4-D Tensor specifying the score of each anchor at each
@@ -3132,11 +3454,13 @@ typedef enum {
* dimensions is the channel dimension.
* * 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each
* predefined anchor, with format [x1, y1, x2, y2]. For input0 of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should be of
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this tensor should be of
* {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}, with scale of 0.125.
* * 3: A 2-D Tensor of shape [batches, 2], specifying the size of
* each image in the batch, with format [image_height, image_width].
- * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this
+ * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this
* tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}, with
* scale of 0.125.
* * 4: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the ratio
@@ -3163,7 +3487,8 @@ typedef enum {
* [num_output_rois], specifying the score of each output box.
* The boxes are grouped by batches, but the sequential order in
* each batch is not guaranteed. For type of
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the scale and zero
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, the scale and zero
* point must be the same as input0.
* * 1: A tensor of the same {@link OperandCode} as input3, of shape
* [num_output_rois, 4], specifying the coordinates of each output
@@ -3188,6 +3513,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3213,6 +3539,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3271,12 +3598,23 @@ typedef enum {
* * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
* * * input.scale * filter.scale).
*
+ * * Quantized signed (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+ * * * input.scale * filter.scale).
+ *
* * Quantized with symmetric per channel quantization for the filter:
* * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} for input, and output.
* * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
* * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
* * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
*
+ * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+ * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+ *
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
@@ -3295,8 +3633,9 @@ typedef enum {
* {@link ANeuralNetworksSymmPerChannelQuantParams}) must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
* tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
* of 0 and bias_scale == input_scale * filter_scale. For filter tensor
* of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
@@ -3316,7 +3655,7 @@ typedef enum {
* * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when
* walking through input in the ‘height’ dimension.
* * 9: An {@link ANEURALNETWORKS_INT32} scalar, specifying the number of
- groups.
+ * groups.
* * 10: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
* {@link FuseCode} values. Specifies the activation to
* invoke on the result.
@@ -3330,12 +3669,14 @@ typedef enum {
* [depth_out, filter_height, filter_width, depth_group], specifying
* the filter, where depth_out must be divisible by num_groups. For
* tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
- * the channel dimension (channelDim at
- * {@link ANeuralNetworksSymmPerChannelQuantParams}) must be set to 0.
+ * the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+ * must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
* tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
* {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
- * type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
* the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
* of 0 and bias_scale == input_scale * filter_scale. For filter tensor
* of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
@@ -3360,7 +3701,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, out_height, out_width, depth_out].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 29.
@@ -3382,6 +3724,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
@@ -3398,13 +3741,18 @@ typedef enum {
* {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should
* be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with zeroPoint
* of 0 and scale of 0.125.
+ * For input0 of type
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this tensor
+ * should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with
+ * zeroPoint of -128 and scale of 0.125.
* * 2: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to specify
* NCHW data layout for input0. Set to false for NHWC.
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0, with shape
* [num_boxes, num_keypoints], specifying score of the keypoints.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from input0 scale and zeroPoint.
* * 1: A tensor of the same {@link OperandCode} as input1, with shape
* [num_boxes, num_keypoints, 2], specifying the location of
@@ -3447,19 +3795,19 @@ typedef enum {
* * 0: An n-D tensor, specifying the tensor to be normalized.
* * 1: A scalar, specifying gamma, the scale applied to the normalized
* tensor. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
- * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
- * ANEURALNETWORKS_FLOAT32} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+ * {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
* * 2: A scalar, specifying beta, the offset applied to the normalized
* tensor. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
- * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
- * ANEURALNETWORKS_FLOAT32} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+ * {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
* * 3: A scalar, specifying epsilon, the small value added to variance to
* avoid dividing by zero. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
- * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
- * ANEURALNETWORKS_FLOAT32} if input0 is of {@link
- * ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+ * {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
* * 4: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to specify
* NCHW data layout for input0 and output0. Set to false for NHWC.
*
@@ -3479,6 +3827,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3505,6 +3854,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3644,6 +3994,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1.
*
@@ -3656,7 +4007,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 29.
@@ -3671,6 +4023,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1.
*
@@ -3683,7 +4036,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 29.
@@ -3719,6 +4073,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3744,6 +4099,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -3761,7 +4117,8 @@ typedef enum {
* pad value must be of {@link ANEURALNETWORKS_FLOAT16}.
* For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the
* pad value must be of {@link ANEURALNETWORKS_FLOAT32}.
- * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
* the pad value must be of {@link ANEURALNETWORKS_INT32}. The
* scale and zeroPoint are assumed to be the same as in input0.
*
@@ -3773,7 +4130,8 @@ typedef enum {
* of the padding:
* output0.dimension[i] =
* padding[i, 0] + input0.dimension[i] + padding[i, 1]
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -3836,6 +4194,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -3846,8 +4205,9 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
- * the scale and zeroPoint can be diffent from the input0 scale and zeroPoint.
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+ * the scales and zeroPoint can be different from input0 scale and zeroPoint.
*
* Available since API level 29.
*/
@@ -3856,14 +4216,23 @@ typedef enum {
/**
* Quantizes the input tensor.
*
- * The formula is:
+ * The formula for {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} output tensor is:
*
* output = max(0, min(255, round(input / scale) + zeroPoint)
*
- * Supported tensor {@link OperandCode}:
+ * The formula for {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} output
+ * tensor is:
+ *
+ * output = max(-128, min(127, round(input / scale) + zeroPoint)
+ *
+ * Supported input tensor {@link OperandCode}:
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
*
+ * Supported output tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+ *
* Supported tensor rank: from 1
*
* Inputs:
@@ -3871,7 +4240,8 @@ typedef enum {
*
* Outputs:
* * 0: The output tensor of same shape as input0, but with
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}.
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or.
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}.
*
* Available since API level 29.
*/
@@ -3995,7 +4365,8 @@ typedef enum {
* * 1: A scalar {@link ANEURALNETWORKS_INT32}, specifying the number of
* independent samples to draw for each row slice.
* * 2: A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [2],
- * specifying seeds used to initialize the random distribution.
+ * specifying seeds used to initialize the random distribution. If both
+ * provided seeds are 0, both will be randomly generated.
* Outputs:
* * 0: A 2-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape
* [batches, samples], containing the drawn samples.
@@ -4026,6 +4397,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
*
* Available since API level 29.
*/
@@ -4053,6 +4426,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
*
* Available since API level 29.
*/
@@ -4070,6 +4445,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -4082,7 +4458,10 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -4101,6 +4480,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: up to 4
*
@@ -4113,7 +4493,10 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -4142,6 +4525,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
*
* Available since API level 29.
*/
@@ -4169,6 +4554,8 @@ typedef enum {
*
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0.
+ * If all dimensions are reduced and keep_dims is false, the output
+ * shape is [1].
*
* Available since API level 29.
*/
@@ -4188,9 +4575,10 @@ typedef enum {
* interpolation.
*
* Supported tensor {@link OperandCode}:
- * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
@@ -4229,7 +4617,8 @@ typedef enum {
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0. The output
* shape is [num_rois, out_height, out_width, depth].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from the input0 scale and zeroPoint.
*
* Available since API level 29.
@@ -4252,6 +4641,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
@@ -4262,7 +4652,8 @@ typedef enum {
* * 0: A 4-D tensor, specifying the feature map.
* * 1: A 2-D Tensor of shape [num_rois, 4], specifying the locations of
* the regions of interest, each line with format [x1, y1, x2, y2].
- * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* this tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM},
* with zeroPoint of 0 and scale of 0.125.
* * 2: An 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
@@ -4282,7 +4673,8 @@ typedef enum {
* Outputs:
* * 0: A tensor of the same {@link OperandCode} as input0. The output
* shape is [num_rois, out_height, out_width, depth].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -4319,6 +4711,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -4329,7 +4722,8 @@ typedef enum {
* true) or input2 (if false).
* * 1: An input tensor of the same shape as input0.
* * 2: An input tensor of the same shape and type as input1.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scales and zeroPoint can be different from input1 scale and zeroPoint.
*
* Outputs:
@@ -4337,6 +4731,7 @@ typedef enum {
* For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
+ * Available since API level 29.
*/
ANEURALNETWORKS_SELECT = 84,
@@ -4376,6 +4771,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -4388,7 +4784,8 @@ typedef enum {
*
* Outputs:
* * 0: An n-D tensor of the same type as the input containing the slice.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* its scale and zeroPoint has to be same as the input0 scale and zeroPoint.
*
* Available since API level 29.
@@ -4403,6 +4800,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -4415,7 +4813,8 @@ typedef enum {
*
* Outputs:
* * 0 ~ (num_splits - 1): Resulting subtensors.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -4455,6 +4854,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -4465,7 +4865,8 @@ typedef enum {
*
* Outputs:
* * 0: A tiled tensor of the same {@link OperandCode} and rank as `input`.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
@@ -4483,6 +4884,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_INT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: from 1
*
@@ -4494,7 +4896,8 @@ typedef enum {
* Outputs:
* * 0: An n-D tensor of the same type as the input, containing the k
* largest elements along each last dimensional slice.
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
* * 1: An n-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}
* containing the indices of values within the last dimension of input.
@@ -4531,6 +4934,18 @@ typedef enum {
* * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
* * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
*
+ * Available since API level 30:
+ * * Quantized signed (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+ * * * input.scale * filter.scale).
+ *
+ * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+ * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+ * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+ * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+ *
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
* [batch, height, width, channels]. Alternatively, the data layout could
@@ -4540,24 +4955,25 @@ typedef enum {
*
* Inputs (explicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
- * specifying the input. Since API level 29, zero batches is supported
- * for this tensor.
+ * specifying the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: A 4-D tensor, of shape
* [depth_out, filter_height, filter_width, depth_in], specifying the
* filter. For tensor of type
* {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
- * dimension (extraParams.channelQuant.channelDim) must be set to 0.
+ * dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim) must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
* tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
- * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias should be of the
- * same type. For input tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
- * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
- * bias_scale == input_scale * filter_scale. For filter tensor of
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal
- * to bias_scale[i] = input_scale * filter_scale[i].
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the
+ * same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+ * with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
* the left, in the ‘width’ dimension.
* * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -4578,24 +4994,25 @@ typedef enum {
*
* Inputs (implicit padding):
* * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
- * specifying the input. Since API level 29, zero batches is supported
- * for this tensor.
+ * specifying the input.
+ * Since API level 29, zero batches is supported for this tensor.
* * 1: A 4-D tensor, of shape
* [depth_out, filter_height, filter_width, depth_in], specifying the
* filter. For tensor of type
* {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
- * dimension (extraParams.channelQuant.channelDim) must be set to 0.
+ * dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim) must be set to 0.
* * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
* tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
* {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias should be of the
- * same type. For input tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
- * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
- * bias_scale == input_scale * filter_scale. For filter tensor of
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
- * must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
- * 0 and bias_scale of 0. The actual scale of each value 'i' is equal
- * to bias_scale[i] = input_scale * filter_scale[i].
+ * same type.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+ * the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+ * with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+ * For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+ * the bias must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+ * and bias_scale of 0. The actual scale of each value 'i' is equal to
+ * bias_scale[i] = input_scale * filter_scale[i].
* * 3: An {@link ANEURALNETWORKS_TENSOR_INT32} tensor, specifying the output
* tensor shape.
* * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
@@ -4614,7 +5031,8 @@ typedef enum {
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, out_height, out_width, depth_out].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint can be different from inputs' scale and zeroPoint.
*
* Available since API level 29.
@@ -4727,8 +5145,21 @@ typedef enum {
* A 3-D tensor of shape:
* If time-major: [max_time, batch_size, output_size]
* If batch-major: [batch_size, max_time, output_size]
+ * * 1: A tensor of shape [batch_size, output_size] containing a hidden
+ * state from the last time step in the sequence. This output is
+ * optional and can be omitted. If this output is present then
+ * output #2 must be present as well.
+ * Available since API level 30.
+ * * 2: A tensor of shape [batch_size, cell_size] containing a cell state
+ * from the last time step in the sequence. This output is optional
+ * and can be omitted.
+ * Available since API level 30.
*
* Available since API level 29.
+ *
+ * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+ * does not maintain internal states. This operator does not support the usage pattern in which
+ * multiple cells are chained and state tensors are propagated.
*/
ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
@@ -4784,8 +5215,16 @@ typedef enum {
* it is set to 1, then the output has a shape [maxTime, batchSize,
* numUnits], otherwise the output has a shape [batchSize, maxTime,
* numUnits].
+ * * 1: A tensor of shape [batchSize, numUnits] containing hidden state
+ * from the last time step in the sequence. This output is optional
+ * and can be omitted.
+ * Available since API level 30.
*
* Available since API level 29.
+ *
+ * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+ * does not maintain internal states. This operator does not support the usage pattern in which
+ * multiple cells are chained and state tensors are propagated.
*/
ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN = 93,
@@ -4800,6 +5239,7 @@ typedef enum {
* * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
* * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
* * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
*
* Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
* With the default data layout NHWC, the data is stored in the order of:
@@ -4817,6 +5257,17 @@ typedef enum {
* height of the output tensor.
* * 3: An {@link ANEURALNETWORKS_BOOL} scalar, default to false.
* Set to true to specify NCHW data layout for input0 and output0.
+ * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the centers of the 4 corner
+ * pixels of the input and output tensors are aligned, preserving the
+ * values at the corner pixels.
+ * Available since API level 30.
+ * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the pixel centers are assumed to
+ * be at (0.5, 0.5). This is the default behavior of image.resize in
+ * TF 2.0. If this parameter is True, then align_corners parameter
+ * must be False.
+ * Available since API level 30.
*
* Inputs (resizing by scale):
* * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
@@ -4835,16 +5286,377 @@ typedef enum {
* {@link ANEURALNETWORKS_FLOAT32} otherwise.
* * 3: An {@link ANEURALNETWORKS_BOOL} scalar, default to false.
* Set to true to specify NCHW data layout for input0 and output0.
+ * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the centers of the 4 corner
+ * pixels of the input and output tensors are aligned, preserving the
+ * values at the corner pixels.
+ * Available since API level 30.
+ * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+ * scalar, default to false. If True, the pixel centers are assumed to
+ * be at (0.5, 0.5). This is the default behavior of image.resize in
+ * TF 2.0. If this parameter is True, then align_corners parameter
+ * must be False.
+ * Available since API level 30.
*
* Outputs:
* * 0: The output 4-D tensor, of shape
* [batches, new_height, new_width, depth].
- * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+ * For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
* the scale and zeroPoint must be the same as input0.
*
* Available since API level 29.
*/
ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR = 94,
+
+ // Operations below are available since API level 30.
+
+ /**
+ * Quantized version of {@link ANEURALNETWORKS_LSTM}.
+ *
+ * The input and the output use asymmetric quantized types, while the rest
+ * use symmetric ones.
+ *
+ * Inputs:
+ * * 0: The input to the LSTM cell.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * Shape: [batchSize, inputSize]
+ * * 1: The input-to-input weights. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, inputSize]
+ * * 2: The input-to-forget weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, inputSize]
+ * * 3: The input-to-cell weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, inputSize]
+ * * 4: The input-to-output weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, inputSize]
+ * * 5: The recurrent-to-input weights. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, outputSize]
+ * * 6: The recurrent-to-forget weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, outputSize]
+ * * 7: The recurrent-to-cell weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, outputSize]
+ * * 8: The recurrent-to-output weights.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [numUnits, outputSize]
+ * * 9: The cell-to-input weights (for peephole). Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 10: The cell-to-forget weights (for peephole). Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 11: The cell-to-output weights (for peephole). Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 12: The input gate bias. Quantized with scale being the
+ * product of input and weights scales and zeroPoint equal to 0.
+ * Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+ * Shape: [numUnits]
+ * * 13: The forget gate bias. Quantized with scale being the
+ * product of input and weights scales and zeroPoint equal to 0.
+ * Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+ * Shape: [numUnits]
+ * * 14: The cell bias. Quantized with scale being the
+ * product of input and weights scales and zeroPoint equal to 0.
+ * Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+ * Shape: [numUnits]
+ * * 15: The output gate bias. Quantized with scale being the
+ * product of input and weights scales and zeroPoint equal to 0.
+ * Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+ * Shape: [numUnits]
+ * * 16: The projection weights. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * Shape: [outputSize, numUnits]
+ * * 17: The projection bias. Quantized with scale being the
+ * product of input and weights scales and zeroPoint equal to 0.
+ * Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+ * Shape: [outputSize]
+ * * 18: The output from the previous time step.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * Shape: [batchSize, outputSize]
+ * * 19: The cell state from the previous time step.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [batchSize, numUnits]
+ * * 20: The input layer normalization weights. Used to rescale
+ * normalized inputs to activation at input gate. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 21: The forget layer normalization weights. Used to
+ * rescale normalized inputs to activation at forget gate. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 22: The cell layer normalization weights. Used to rescale
+ * normalized inputs to activation at cell gate. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 23: The output layer normalization weights. Used to
+ * rescale normalized inputs to activation at output gate. Optional.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [numUnits]
+ * * 24: The cell clip. If provided the cell state is clipped
+ * by this value prior to the cell output activation. Optional.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 25: The projection clip. If provided and projection is enabled,
+ * this is used for clipping the projected values. Optional.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 26: The scale of the intermediate result of matmul,
+ * i.e. input to layer normalization, at input gate.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 27: The scale of the intermediate result of matmul,
+ * i.e. input to layer normalization, at forget gate.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 28: The scale of the intermediate result of matmul,
+ * i.e. input to layer normalization, at cell gate.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 29: The scale of the intermediate result of matmul,
+ * i.e. input to layer normalization, at output gate.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ * * 30: The zero point of the hidden state, i.e. input to
+ * projection.
+ * Type: {@link ANEURALNETWORKS_INT32}.
+ * * 31: The scale of the hidden state, i.e. input to
+ * projection.
+ * Type: {@link ANEURALNETWORKS_FLOAT32}.
+ *
+ * Outputs:
+ * * 0: The output state (out).
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * Shape: [batchSize, outputSize]
+ * * 1: The cell state (out).
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * Shape: [batchSize, numUnits]
+ * * 2: The output. This is effectively the same as the current
+ * "output state (out)" value.
+ * Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ * Shape: [batchSize, outputSize]
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_QUANTIZED_LSTM = 95,
+
+ /**
+ * Executes one of the two referenced models as determined by a boolean
+ * value.
+ *
+ * The inputs and outputs of the two referenced models must agree with the
+ * signature of this operation. That is, if the operation has (3 + n) inputs
+ * and m outputs, both models must have n inputs and m outputs with the same
+ * types, ranks (if specified), dimensions (if specified), scales,
+ * zeroPoints, and other operand parameters as the corresponding operation
+ * inputs and outputs.
+ *
+ * Inputs:
+ * * 0: A value of type {@link ANEURALNETWORKS_TENSOR_BOOL8} and shape [1]
+ * that determines which of the two referenced models to execute.
+ * The operand must have fully specified dimensions.
+ * * 1: A {@link ANEURALNETWORKS_MODEL} reference to the model to be
+ * executed if the condition is true.
+ * * 2: A {@link ANEURALNETWORKS_MODEL} reference to the model to be
+ * executed if the condition is false.
+ * * 3 ~ (n + 2): Inputs to be passed to the model selected for execution.
+ *
+ * Outputs:
+ * * 0 ~ (m - 1): Outputs produced by the selected model.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_IF = 96,
+
+ /**
+ * Executes the body model until the condition model outputs false.
+ *
+ * The inputs to this operation are the condition model, the body model,
+ * and operand values for the first iteration of the loop. The values are
+ * implicitly split into three groups of input-output, state-only, and
+ * input-only values, as described below.
+ *
+ * The outputs of this operation are the final values of input-output
+ * operands.
+ *
+ * Both the condition and body model receive (m + k + n) inputs.
+ * * The first m (m >= 1) inputs are input-output operands. For the first
+ * iteration, these are initialized from the corresponding inputs of the
+ * WHILE operation. In subsequent iterations, their values come from the
+ * corresponding outputs of the body model produced during the previous
+ * iteration.
+ * * The next k (k >= 0) inputs are state-only operands. They are similar to
+ * the input-output operands, except that their values are no longer
+ * available after the loop terminates.
+ * * The last n (n >= 0) inputs are input-only operands. Their values come
+ * from the corresponding inputs of the WHILE operation.
+ *
+ * The body model produces (m + k) outputs.
+ * * The first m outputs are input-output operands. They become the outputs
+ * of the WHILE operation when a termination condition is reached.
+ * * The last k outputs are state-only operands. Their values are no longer
+ * available after the loop terminates.
+ *
+ * The numbers m, k, and n are inferred by the runtime as follows:
+ * m = (WHILE operation output count)
+ * k = (body model output count) - m
+ * n = (body model input count) - m - k
+ *
+ * The pseudo-code below illustrates the flow of a WHILE operation with
+ * inputs condition, body, initial_input_output, initial_state, input_only
+ * (m = 1, k = 1, n = 1):
+ *
+ * input_output = initial_input_output
+ * state = initial_state
+ * while condition(input_output, state, input_only):
+ * input_output, state = body(input_output, state, input_only)
+ * return input_output
+ *
+ * To prevent infinite loops, there is an implicit execution timeout
+ * associated with each loop ("loop timeout duration"). See {@link
+ * ANeuralNetworksExecution_setLoopTimeout}.
+ *
+ * Inputs:
+ * * 0: A {@link ANEURALNETWORKS_MODEL} reference to the condition
+ * model. The model must have (m + k + n) inputs with
+ * the same types, ranks (if specified), dimensions (if specified),
+ * scales, zeroPoints, and other operand parameters as the
+ * corresponding inputs of the WHILE operation and exactly one output
+ * of {@link ANEURALNETWORKS_TENSOR_BOOL8} and shape [1].
+ * The output operand must have fully specified dimensions.
+ * * 1: A {@link ANEURALNETWORKS_MODEL} reference to the body model.
+ * The model must have (m + k + n) inputs and (m + k) outputs with
+ * the same types, ranks (if specified), dimensions (if specified),
+ * scales, zeroPoints, and other operand parameters as the
+ * corresponding inputs and outputs of the WHILE operation.
+ * * (m inputs): Initial values for input-output operands.
+ * * (k inputs): Initial values for state-only operands.
+ * * (n inputs): Values for input-only operands.
+ *
+ * Outputs:
+ * * 0 ~ (m - 1): Outputs produced by the loop.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_WHILE = 97,
+
+ /**
+ * Computes exponential linear activation on the input tensor element-wise.
+ *
+ * The output is calculated using the following formula:
+ *
+ * ELU(x) = max(0, x) + min(0, alpha * (exp(x) - 1))
+ *
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: from 1.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input. May be zero-sized.
+ * * 1: A scalar, specifying the alpha parameter.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT16},
+ * the alpha value must be of {@link ANEURALNETWORKS_FLOAT16}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+ * the alpha value must be of {@link ANEURALNETWORKS_FLOAT32}.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape and type as input0.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_ELU = 98,
+
+ /**
+ * Computes hard-swish activation on the input tensor element-wise.
+ *
+ * Hard swish activation is introduced in
+ * https://arxiv.org/pdf/1905.02244.pdf
+ *
+ * The output is calculated using the following formula:
+ *
+ * h-swish(x) = x * max(0, min(6, (x + 3))) / 6
+
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ *
+ * Supported tensor rank: from 1.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input. May be zero-sized.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape and type as input0.
+ * Scale and zero point of this tensor may be different from the input
+ * tensor's parameters.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_HARD_SWISH = 99,
+
+ /**
+ * Creates a tensor filled with a scalar value.
+ *
+ * Supported output tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32}
+ *
+ * Supported tensor rank: from 1.
+ *
+ * Inputs:
+ * * 0: A 1-D tensor, specifying the desired output tensor shape.
+ * * 1: A scalar, specifying the value to fill the output tensors with.
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT16},
+ * the scalar must be of {@link ANEURALNETWORKS_FLOAT16}.
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+ * the scalar must be of {@link ANEURALNETWORKS_FLOAT32}.
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
+ * the scalar must be of {@link ANEURALNETWORKS_INT32}.
+ *
+ * Outputs:
+ * * 0: The output tensor.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_FILL = 100,
+
+ /**
+ * Returns the rank of a tensor.
+ *
+ * The rank of a tensor is the number of dimensions in it. Also known as
+ * "order", "degree", "ndims".
+ *
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_INT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_BOOL8}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+ *
+ * Supported tensor rank: from 1.
+ *
+ * Inputs:
+ * * 0: The input tensor.
+ *
+ * Outputs:
+ * * 0: A scalar of {@link ANEURALNETWORKS_INT32}, specifying the rank
+ * of the input tensor.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_RANK = 101,
} OperationCode;
/**
@@ -4880,10 +5692,11 @@ typedef enum {
* the same; for odd number of padding, padding to the ending is bigger
* than the padding to the beginning by 1.
*
- * total_padding is a function of input, stride and filter size.
+ * total_padding is a function of input, stride, dilation and filter size.
* It could be computed as follows:
- * out_size = (input + stride - 1) / stride;
- * needed_input = (out_size - 1) * stride + filter_size
+ * out_size = (input + stride - 1) / stride
+ * effective_filter_size = (filter_size - 1) * dilation + 1
+ * needed_input = (out_size - 1) * stride + effective_filter_size
* total_padding = max(0, needed_input - input_size)
* The computation is the same for the horizontal and vertical directions.
*/
@@ -5004,6 +5817,47 @@ typedef enum {
* Failure caused by a device not being available.
*/
ANEURALNETWORKS_UNAVAILABLE_DEVICE = 9,
+
+ /**
+ * Failure because a deadline could not be met for a task, but future
+ * deadlines may still be met for the same task after a short delay.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT = 10,
+
+ /**
+ * Failure because a deadline could not be met for a task, and future
+ * deadlines will likely also not be met for the same task even after a
+ * short delay.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT = 11,
+
+ /**
+ * Failure because of a resource limitation within the driver, but future
+ * calls for the same task may still succeed after a short delay.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT = 12,
+
+ /**
+ * Failure because of a resource limitation within the driver, and future
+ * calls for the same task will likely also fail even after a short
+ * delay.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT = 13,
+
+ /**
+ * Failure indicating an object is in a dead state.
+ *
+ * Available since API level 30.
+ */
+ ANEURALNETWORKS_DEAD_OBJECT = 14,
} ResultCode;
/**
@@ -5024,6 +5878,48 @@ enum { ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128 };
enum { ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN = 32 };
/**
+ * Different duration measurements.
+ *
+ * Durations are measured in nanoseconds.
+ *
+ * Available since API level 29.
+ */
+typedef enum {
+ // Execution time on hardware (not driver, which runs on host processor).
+ ANEURALNETWORKS_DURATION_ON_HARDWARE = 0,
+ // Execution time in driver (including time on hardware). Excludes overhead
+ // such as that of the runtime itself and the IPC needed for the runtime to
+ // communicate with the driver.
+ ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+ // Execution time on hardware, after all dependencies have been signaled.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+ // Execution time in driver, after all dependencies have been signaled. Excludes
+ // overhead such as that of the runtime itself and the IPC needed for the runtime
+ // to communicate with the driver.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
+} DurationCode;
+
+/**
+ * Relative execution priority.
+ *
+ * Available since API level 30.
+ */
+typedef enum {
+ ANEURALNETWORKS_PRIORITY_LOW = 90,
+ ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
+ ANEURALNETWORKS_PRIORITY_HIGH = 110,
+ ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
+} PriorityCode;
+
+/**
* ANeuralNetworksMemory is an opaque type that represents memory.
*
* This type is used to represent shared memory, memory mapped files,
@@ -5049,7 +5945,21 @@ enum { ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN = 32 };
* of the element type byte size, e.g., a tensor with
* {@link ANEURALNETWORKS_TENSOR_FLOAT32} type must be aligned on 4-byte boundary.
*
+ * It is the application's responsibility to ensure that there are no uses of
+ * the memory after calling {@link ANeuralNetworksMemory_free}. This includes
+ * any model which references this memory because of a call to
+ * {@link ANeuralNetworksModel_setOperandValueFromMemory}, any compilation
+ * created using such a model, any execution object or burst object created
+ * using such a compilation, or any execution which references this memory
+ * because of a call to {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.
+ *
* Available since API level 27.
+ *
+ * Starting at API level 30, the application may request creation of device native memory from
+ * {@link ANeuralNetworksMemoryDesc} to avoid potential memory copying and transformation
+ * overhead between executions. See also {@link ANeuralNetworksMemoryDesc} and
+ * {@link ANeuralNetworksMemory_createFromDesc}.
*/
typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
@@ -5079,9 +5989,10 @@ typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
* modifies a model at a given time. It is however safe for more than one
* thread to use the model once {@link ANeuralNetworksModel_finish} has returned.</p>
*
- * <p>It is also the application's responsibility to ensure that there are no other
- * uses of the model after calling {@link ANeuralNetworksModel_free}.
- * This includes any compilation or execution object created using the model.</p>
+ * <p>It is also the application's responsibility to ensure that there are no
+ * other uses of the model after calling {@link ANeuralNetworksModel_free}.
+ * This includes any compilation, execution object or burst object created using
+ * the model.</p>
*
* Available since API level 27.
*/
@@ -5119,7 +6030,10 @@ typedef struct ANeuralNetworksModel ANeuralNetworksModel;
*
* <p>It is also the application's responsibility to ensure that there are no other
* uses of the compilation after calling {@link ANeuralNetworksCompilation_free}.
- * This includes any execution object created using the compilation.</p>
+ * This includes any execution object or burst object created using the compilation,
+ * or any memory descriptor with the compilation as part of one of the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} or
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}.</p>
*
* Available since API level 27.
*/
@@ -5139,7 +6053,8 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
* {@link ANeuralNetworksExecution_setOutput} or
* {@link ANeuralNetworksExecution_setOutputFromMemory}.</li>
* <li>Apply the model with one of the following:</li><ul>
- * <li>Asynchronously with {@link ANeuralNetworksExecution_startCompute},
+ * <li>Asynchronously with {@link ANeuralNetworksExecution_startCompute}
+ * or with {@link ANeuralNetworksExecution_startComputeWithDependencies},
* waiting for the execution to complete with
* {@link ANeuralNetworksEvent_wait}.</li>
* <li>Synchronously with {@link ANeuralNetworksExecution_compute}.</li>
@@ -5154,38 +6069,54 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
* ({@link ANeuralNetworksModel_setOperandValueFromMemory}).</p>
*
* <p>An execution cannot be modified once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} has been called on it.</p>
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} has been called on it.</p>
*
* <p>An execution can be applied to a model with
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} only once. Create new
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} only once. Create new
* executions to do new evaluations of the model.</p>
*
* <p>It is the application's responsibility to make sure that only one thread
* modifies an execution at a given time. It is however safe for more than one
* thread to use {@link ANeuralNetworksEvent_wait} at the same time.</p>
*
+ * <p>It is also the application's responsibility to ensure that the execution
+ * either has never been scheduled or has completed (i.e., that
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute}, or
+ * {@link ANeuralNetworksEvent_wait} has returned) before calling
+ * {@link ANeuralNetworksExecution_free}.</p>.
+ *
* <p>It is also the application's responsibility to ensure that there are no other
* uses of the execution after calling {@link ANeuralNetworksExecution_free}.</p>
*
* <p>Multiple executions can be scheduled and evaluated concurrently, either by
- * means of {@link ANeuralNetworksExecution_compute} (which is synchronous) in
- * different threads or by means of
- * {@link ANeuralNetworksExecution_startCompute} (which is asynchronous). The
- * runtime makes no guarantee on the ordering of completion of executions. If
- * it's important to the application, the application should enforce the
- * ordering by ensuring that one execution completes before the next is
- * scheduled (for example, by scheduling all executions synchronously within a
- * single thread, or by scheduling all executions asynchronously and using
- * {@link ANeuralNetworksEvent_wait} between calls to
- * {@link ANeuralNetworksExecution_startCompute}).</p>
+ * means of {@link ANeuralNetworksExecution_compute} or
+ * {@link ANeuralNetworksExecution_burstCompute} (which are synchronous) in
+ * different threads, or by means of
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} (which are asynchronous).
+ * (Concurrent uses of {@link ANeuralNetworksExecution_burstCompute} must be on
+ * different burst objects.) The runtime makes no guarantee on the ordering of
+ * completion of executions. If it's important to the application, the
+ * application should enforce the ordering by ensuring that one execution
+ * completes before the next is scheduled (for example, by scheduling all
+ * executions synchronously within a single thread, or by scheduling all
+ * executions asynchronously and using {@link ANeuralNetworksEvent_wait} between
+ * calls to {@link ANeuralNetworksExecution_startCompute}); or by using
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} to make the execution wait for a
+ * list of events to be signaled before starting the actual evaluation.</p>
*
* Available since API level 27.
*/
typedef struct ANeuralNetworksExecution ANeuralNetworksExecution;
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
/**
* Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand.
*/
@@ -5230,7 +6161,7 @@ typedef struct ANeuralNetworksSymmPerChannelQuantParams {
* Available since API level 29.
*/
typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
-#endif // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif // __ANDROID_API__ >= 29
/**
* ANeuralNetworksOperandType describes the type of an operand.
@@ -5245,7 +6176,9 @@ typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
*
* If a tensor operand's type is not fully specified, the dimensions
* of the operand are deduced from the operand types and values of the
- * operation for which that operand is an output.
+ * operation for which that operand is an output or from the corresponding
+ * {@link ANEURALNETWORKS_IF} or {@link ANEURALNETWORKS_WHILE} operation input
+ * operand type in the case of referenced model input operands.
*
* <p>In the following situations, a tensor operand type must be fully
* specified:<ul>
@@ -5254,16 +6187,25 @@ typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
* non-nullptr buffer) or
* {@link ANeuralNetworksModel_setOperandValueFromMemory}.</li>
* <li>The operand is a model input (see
- * {@link ANeuralNetworksModel_identifyInputsAndOutputs}). A
- * fully specified tensor operand type must either be provided
- * to {@link ANeuralNetworksModel_addOperand}; or it must be
- * provided to the corresponding
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}) of the main
+ * model within a compilation. A fully specified tensor operand type
+ * must either be provided to {@link ANeuralNetworksModel_addOperand};
+ * or it must be provided to the corresponding
* {@link ANeuralNetworksExecution_setInput}, or
* {@link ANeuralNetworksExecution_setInputFromMemory}.
* EXCEPTION: If the input is optional and omitted
* (by passing nullptr for buffer to
* {@link ANeuralNetworksExecution_setInput}) then it need
- * not have a fully specified tensor operand type.</li></ul>
+ * not have a fully specified tensor operand type.</li>
+ * <li>The operand is a model output (see
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}) of the main
+ * model within a compilation and is to be used with {@link
+ * ANeuralNetworksExecution_startComputeWithDependencies}.
+ * A fully specified tensor operand type must either be provided
+ * to {@link ANeuralNetworksModel_addOperand}; or it must be
+ * provided to the corresponding
+ * {@link ANeuralNetworksExecution_setOutput}, or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
*
* A tensor operand type of specified rank but some number of
* unspecified dimensions is represented by setting dimensionCount to
@@ -5296,11 +6238,21 @@ typedef struct ANeuralNetworksOperandType {
const uint32_t* dimensions;
/**
- * These two fields are only used for quantized tensors.
- * They must be zero for all other types.
- * The dequantized value of each entry is (value - zeroPoint) * scale.
+ * The quantization scale.
+ *
+ * Must be 0 when not applicable to an operand type.
+ *
+ * See {@link OperandCode}.
*/
float scale;
+
+ /**
+ * The quantization zero point.
+ *
+ * Must be 0 when not applicable to an operand type.
+ *
+ * See {@link OperandCode}.
+ */
int32_t zeroPoint;
} ANeuralNetworksOperandType;
@@ -5314,7 +6266,7 @@ typedef int32_t ANeuralNetworksOperationType;
*/
typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
/**
* ANeuralNetworksDevice is an opaque type that represents a device.
@@ -5326,6 +6278,318 @@ typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
*/
typedef struct ANeuralNetworksDevice ANeuralNetworksDevice;
+#endif // __ANDROID_API__ >= 29
+
+#if __ANDROID_API__ >= 30
+
+/**
+ * ANeuralNetworksMemoryDesc is an opaque type that represents a memory descriptor.
+ *
+ * A memory descriptor describes the properties of a memory object, and is used by
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * To use:
+ * - Create a new memory descriptor by calling {@link ANeuralNetworksMemoryDesc_create}.
+ * - Specify all of the intended input and output roles by calling
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}.
+ * - Optionally, specify the memory dimensions by calling
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ * - Complete the memory descriptor with {@link ANeuralNetworksMemoryDesc_finish}.
+ * - Use the memory descriptor as many times as needed with
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ * - Destroy the memory descriptor with {@link ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor is completed by calling {@link ANeuralNetworksMemoryDesc_finish}.
+ * A memory descriptor is destroyed by calling {@link ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor must not be modified once {@link ANeuralNetworksMemoryDesc_finish}
+ * has been called on it.
+ *
+ * It is the application's responsibility to make sure that only
+ * one thread modifies a memory descriptor at a given time. It is however
+ * safe for more than one thread to use the memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has returned.
+ *
+ * It is also the application's responsibility to ensure that there are no other
+ * uses of the memory descriptor after calling {@link ANeuralNetworksMemoryDesc_free}.
+ * It is however safe to continue using a {@link ANeuralNetworksMemory} object created
+ * from the memory descriptor.
+ *
+ * Available since API level 30.
+ */
+typedef struct ANeuralNetworksMemoryDesc ANeuralNetworksMemoryDesc;
+
+/**
+ * Create a {@link ANeuralNetworksMemoryDesc} with no properties.
+ *
+ * This only creates the memory descriptor. Its properties should be set with calls to
+ * {@link ANeuralNetworksMemoryDesc_addInputRole},
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, and
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ *
+ * {@link ANeuralNetworksMemoryDesc_finish} must be called once all properties have been set.
+ *
+ * {@link ANeuralNetworksMemoryDesc_free} must be called once the memory descriptor
+ * is no longer needed.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The {@link ANeuralNetworksMemoryDesc} to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_create(ANeuralNetworksMemoryDesc** desc) __INTRODUCED_IN(30);
+
+/**
+ * Destroy a memory descriptor.
+ *
+ * The memory descriptor need not have been finished by a call to
+ * {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksMemoryDesc_free(ANeuralNetworksMemoryDesc* desc) __INTRODUCED_IN(30);
+
+/**
+ * Specify that a memory object will be playing the role of an input to an execution created from a
+ * particular compilation.
+ *
+ * The compilation and the input index fully specify an input operand. This function
+ * may be invoked multiple times on the same memory descriptor with different input operands,
+ * and the same input operand may be specified on multiple memory descriptors. However,
+ * specifying the same input operand on the same memory descriptor more than once will
+ * return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each other. Two
+ * dimensions are incompatible if both ranks are fully specified but have different values, or if
+ * there is at least one axis that is fully specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on a memory descriptor
+ * before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished by calling
+ * {@link ANeuralNetworksCompilation_finish}, and must outlive the memory
+ * descriptor.
+ * @param index The index of the input argument we are referencing from the compilation. It is
+ * an index into the inputs list passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0]. Describes how likely the
+ * memory is to be used in the specified role. This is provided as a hint to
+ * optimize the case when different roles prefer different memory locations or data
+ * layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_addInputRole(ANeuralNetworksMemoryDesc* desc,
+ const ANeuralNetworksCompilation* compilation,
+ uint32_t index, float frequency) __INTRODUCED_IN(30);
+
+/**
+ * Specify that a memory object will be playing the role of an output to an execution created from a
+ * particular compilation.
+ *
+ * The compilation and the output index fully specify an output operand. This function
+ * may be invoked multiple times on the same memory descriptor with different output operands,
+ * and the same output operand may be specified on multiple memory descriptors. However,
+ * specifying the same output operand on the same memory descriptor object more than once will
+ * return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each other. Two
+ * dimensions are incompatible if both ranks are fully specified but have different values, or if
+ * there is at least one axis that is fully specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on the memory descriptor
+ * before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished by calling
+ * {@link ANeuralNetworksCompilation_finish}, and must outlive the memory
+ * descriptor.
+ * @param index The index of the output argument we are referencing from the compilation. It is
+ * an index into the outputs list passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0]. Describes how likely the
+ * memory is to be used in the specified role. This is provided as a hint to
+ * optimize the case when multiple roles prefer different memory locations or data
+ * layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_addOutputRole(ANeuralNetworksMemoryDesc* desc,
+ const ANeuralNetworksCompilation* compilation,
+ uint32_t index, float frequency) __INTRODUCED_IN(30);
+
+/**
+ * Set the dimensional information of the memory descriptor.
+ *
+ * The specified dimensions must be compatible with the dimensions of the corresponding model
+ * operands of all the roles specified by {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}. Two dimensions are incompatible if both ranks
+ * are fully specified but have different values, or if there is at least one axis that is fully
+ * specified in both but has different values.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param rank The number of dimensions. Must be 0 for scalars.
+ * @param dimensions An array of dimensions. An entry with the value 0 indicates that the
+ * corresponding axis has an unknown size.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_setDimensions(ANeuralNetworksMemoryDesc* desc, uint32_t rank,
+ const uint32_t* dimensions) __INTRODUCED_IN(30);
+
+/**
+ * Indicate that we have finished modifying a memory descriptor. Required before calling
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * This function must only be called once for a given memory descriptor.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_finish(ANeuralNetworksMemoryDesc* desc) __INTRODUCED_IN(30);
+
+/**
+ * Creates a memory object from a memory descriptor.
+ *
+ * The memory object is created with an uninitialized buffer. A memory object with an uninitialized
+ * buffer may only be used according to the roles specified by {@link
+ * ANeuralNetworksMemoryDesc_addOutputRole}, or as the destination memory in {@link
+ * ANeuralNetworksMemory_copy}. The buffer of a memory object is initialized after the memory object
+ * is used as an output in a successful execution, or used as the destination memory in a successful
+ * {@link ANeuralNetworksMemory_copy}. A memory object with an initialized buffer may be used
+ * according to all roles specified in {@link ANeuralNetworksMemoryDesc}, or as the source or
+ * destination memory in {@link ANeuralNetworksMemory_copy}. The buffer of a memory object will
+ * return to the uninitialized state if the memory object is used as an output in a failed
+ * execution, or used as the destination memory in a failed {@link ANeuralNetworksMemory_copy}.
+ *
+ * The dimensions of the memory descriptor are deduced from the dimensions of the corresponding
+ * model operands of all the roles specified by {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, as well as the dimensions set by the call to
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}, if any. The memory descriptor may have
+ * unspecified dimensions or rank. In such a case, the same memory object may be used with different
+ * shapes of outputs in different executions. When the memory is used as an input, the input shape
+ * must be the same as the output shape from the last execution using this memory object as an
+ * output, or the last {@link ANeuralNetworkMemory_copy} using this memory object as the destination
+ * memory. Creating a memory object with unspecified dimensions or rank may fail for certain sets of
+ * roles.
+ *
+ * Using the memory in roles or shapes that are not compatible with the rules specified above will
+ * return an error.
+ *
+ * When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} with the memory object,
+ * both offset and length must be set to zero and the entire memory region will be
+ * associated with the specified input or output operand.
+ *
+ * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with the memory created from this
+ * function will return an error.
+ *
+ * {@link ANeuralNetworksMemory_free} must be called once the memory is no longer needed.
+ *
+ * Attempting to create memory from an unfinished memory descriptor will return an error.
+ *
+ * The provided {@link ANeuralNetworksMemoryDesc} need not outlive the {@link ANeuralNetworksMemory}
+ * object.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor.
+ * @param memory The memory object to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful; ANEURALNETWORKS_OP_FAILED if the memory is
+ * created with unspecified dimensions or rank and it is not supported for this set of
+ * roles.
+ */
+int ANeuralNetworksMemory_createFromDesc(const ANeuralNetworksMemoryDesc* desc,
+ ANeuralNetworksMemory** memory) __INTRODUCED_IN(30);
+
+/**
+ * Copies data from one memory object to another.
+ *
+ * If at most one of the src and dst is created from {@link ANeuralNetworksMemory_createFromDesc},
+ * the src and dst must have the same logical size:
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromFd}, or if it is created
+ * from {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with format of
+ * AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size of the memory.
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with a
+ * format other than AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size when there is
+ * no padding and the data is tightly packed. This function may fail if the AHardwareBuffer
+ * cannot be accessed.
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromDesc}, the logical size
+ * equals the size indicated by the {@link OperandCode} multiplied by the number of elements. This
+ * function will fail if the number of elements is unknown.
+ *
+ * If both src and dst are created from {@link ANeuralNetworksMemory_createFromDesc}, they must have
+ * compatible dimensions. Two dimensions are incompatible if both ranks are fully specified but
+ * have different values, or if there is at least one axis that is fully specified in both but has
+ * different values. The dst may have unspecified dimensions or rank. In such a case, the dimensions
+ * of dst will get updated according to the dimensions of the src.
+ *
+ * In both cases, if the src is created from {@link ANeuralNetworksMemory_createFromDesc}, it must
+ * have been used as an output in a successful execution, or used as the destination memory in a
+ * successful {@link ANeuralNetworksMemory_copy}.
+ *
+ * The src and dst may have different data layout, in which case the data copying is performed
+ * logically with data layout transformation.
+ *
+ * Available since API level 30.
+ *
+ * @param src The source memory object.
+ * @param dst The destination memory object.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory* src, const ANeuralNetworksMemory* dst)
+ __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
+#if __ANDROID_API__ >= 29
+
/**
* Get the number of available devices.
*
@@ -5359,7 +6623,8 @@ int ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice** device)
* @param device The representation of the specified device.
* @param name The returned name of the specified device. The name will be in UTF-8
* and will be null-terminated. It will be recognizable as a known device name
- * rather than a cryptic string. For devices with feature level 29 and above, the
+ * rather than a cryptic string. For devices with feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is 29 and above, the
* format of the name is {VENDOR}-{DEVICE}. For devices with feature level 28
* or lower, the format of the name is undefined.
* The name will remain valid for the duration of the application.
@@ -5439,6 +6704,26 @@ int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice* device, const
int ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice* device,
int64_t* featureLevel) __INTRODUCED_IN(29);
+#if __ANDROID_API__ >= 30
+
+/**
+ * Wait until the device is in a live state.
+ *
+ * A device may encounter internal errors and temporarily enter a dead state. A
+ * call that uses a device in such a state will return with the error
+ * {@link ANEURALNETWORKS_DEAD_OBJECT}. ANeuralNetworksDevice_wait will block until
+ * the device is in a live state.
+ *
+ * @param device The representation of the specified device.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
/**
* Get the supported operations for a specified set of devices. If multiple devices
* are selected, the supported operation list is a union of supported operations of all
@@ -5473,6 +6758,10 @@ int ANeuralNetworksModel_getSupportedOperationsForDevices(
* ANeuralNetworksCompilation_create}, where the runtime will attempt to recover
* from such failures.
*
+ * The model passed to this function is termed the "main model" of the
+ * compilation, to distinguish it from other models referred to by an Operand
+ * of type {@link ANEURALNETWORKS_MODEL} within this compilation.
+ *
* @param model The {@link ANeuralNetworksModel} to be compiled.
* @param devices The set of devices. Must not contain duplicates.
* @param numDevices The number of devices in the set.
@@ -5502,7 +6791,7 @@ int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel* model,
* data. It is recommended to use the code cache directory provided
* by the Android runtime. If not using the code cache directory, the
* user should choose a directory local to the application, and is
- * responsible to managing the cache entries.
+ * responsible for managing the cache entries.
* @param token The token provided by the user to specify a model must be of length
* ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should ensure that
* the token is unique to a model within the application. The NNAPI
@@ -5525,10 +6814,24 @@ int ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation* compilatio
* execution has completed and the outputs are ready to be consumed.
* </p>
*
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on this execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned. If the device has
+ * a feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel}
+ * that is lower than 30, then the timeout duration hint will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned.
+ *
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
- * See {@link ANeuralNetworksExecution_startCompute} for asynchronous execution.
- * Synchronous execution incurs lower overhead than asynchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
*
* Available since API level 29.
*
@@ -5544,9 +6847,10 @@ int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution) __INTR
* Get the dimensional information of the specified output operand of the model of the
* {@link ANeuralNetworksExecution}.
*
- * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
- * the resources used by the execution.
+ * The execution must have completed. On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
*
* @param execution The execution to be queried.
* @param index The index of the output argument we are querying. It is
@@ -5569,9 +6873,10 @@ int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* exec
* Get the dimensional information of the specified output operand of the model of the
* {@link ANeuralNetworksExecution}. The target output operand cannot be a scalar.
*
- * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
- * the resources used by the execution.
+ * The execution must have completed. On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
*
* @param execution The execution to be queried.
* @param index The index of the output argument we are querying. It is an index into the lists
@@ -5625,11 +6930,28 @@ void ANeuralNetworksBurst_free(ANeuralNetworksBurst* burst) __INTRODUCED_IN(29);
* <p>Schedules synchronous evaluation of the execution. Returns once the
* execution has completed and the outputs are ready to be consumed.</p>
*
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on the execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned.
+ *
+ * If the execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned. If the device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
* <p>There must be at most one {@link ANeuralNetworksExecution} processing at
* any given time for any given burst object. Any
* {@link ANeuralNetworksExecution} launched before the previous has finished
* will result in ANEURALNETWORKS_BAD_STATE.</p>
*
+ * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
+ *
* Available since API level 29.
*
* @param burst The burst object to execute on.
@@ -5656,14 +6978,14 @@ int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution* execution,
* offset and length must be set to zero and the entire memory region will be
* associated with the specified input or output operand. There is no guarantee
* that an arbitrary AHardwareBuffer_Format and AHardwareBuffer_UsageFlags combination
- * can be used by arbitrary devices. The execution will fail if selected set of devices
- * cannot consume the buffer.
+ * can be used by arbitrary devices. The execution will fail if the selected set of
+ * devices cannot consume the buffer.
*
* Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared memory
* backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB is
* disallowed.
*
- * TODO(miaowang): add documentation about intended usage with introspection API.
+ * The provided AHardwareBuffer must outlive the ANeuralNetworksMemory object.
*
* Available since API level 29.
*
@@ -5686,8 +7008,12 @@ int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb,
*
* By default, duration is not measured.
*
- * The {@link ANeuralNetworksExecution} must have been created with
+ * The {@link ANeuralNetworksExecution} must have been created from an
+ * {@link ANeuralNetworksCompilation} which in turn was created from
* {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1.
+ * If the device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 29, then the
+ * duration will not be measured.
*
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
@@ -5702,41 +7028,32 @@ int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution* executio
__INTRODUCED_IN(29);
/**
- * Different duration measurements.
- *
- * Durations are measured in nanoseconds.
- *
- * Available since API level 29.
- */
-typedef enum {
- // Execution time on hardware (not driver, which runs on host processor).
- ANEURALNETWORKS_DURATION_ON_HARDWARE = 0,
- // Execution time in driver (including time on hardware). Excludes overhead
- // such as that of the runtime itself and the IPC needed for the runtime to
- // communicate with the driver.
- ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
-} DurationCode;
-
-/**
* Get the time spent in the specified {@link ANeuralNetworksExecution}, in nanoseconds.
- * The execution must have completed.
*
- * Available since API level 29.
+ * The execution must have completed. On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
*
* @param execution The execution to be queried.
* @param durationCode The measurement to be queried, specified by {@link DurationCode}.
* @param duration The returned duration. If no measurement was requested by
- * {@link ANeuralNetworksExecution_setMeasureTiming}, or for some other
- * reason the duration is not available, UINT64_MAX will be returned.
- * A particular device need not support any given measurement.
+ * {@link ANeuralNetworksExecution_setMeasureTiming}, if the
+ * device is has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower
+ * than 29, or for some other reason the duration is not
+ * available, UINT64_MAX will be returned. A particular device
+ * need not support any given measurement.
*
* @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
*/
int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution* execution,
int32_t durationCode, uint64_t* duration)
__INTRODUCED_IN(29);
-#endif // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif // __ANDROID_API__ >= 29
#if __ANDROID_API__ >= 27
@@ -5776,7 +7093,8 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t
*
* Available since API level 27.
*
- * @param memory The memory object to be freed.
+ * @param memory The memory object to be freed. Passing NULL is acceptable and
+ * results in no operation.
*/
void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) __INTRODUCED_IN(27);
@@ -5784,8 +7102,10 @@ void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) __INTRODUCED_IN(2
* Create an empty {@link ANeuralNetworksModel}.
*
* <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} is invoked.
*
* The model should be constructed with calls to
* {@link ANeuralNetworksModel_addOperation} and
@@ -5826,8 +7146,8 @@ void ANeuralNetworksModel_free(ANeuralNetworksModel* model) __INTRODUCED_IN(27);
* calling {@link ANeuralNetworksCompilation_create} and
* {@link ANeuralNetworksCompilation_createForDevices}.
*
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
+ * An application must ensure that no other thread uses the model at the same
+ * time.
*
* This function must only be called once for a given model.
*
@@ -5901,11 +7221,13 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
* {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
* are immediately copied into the model.
*
- * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
- * a pointer to the buffer is stored within the model. The application is responsible
- * for not changing the content of this region until all executions using this model
- * have completed. As the data may be copied during processing, modifying the data
- * after this call yields undefined results.
+ * For values of length greater than
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}, a pointer to
+ * the buffer is stored within the model. The application must not change the
+ * content of this region until all executions using this model have
+ * completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results. The provided buffer must outlive
+ * this model.
*
* For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
* is likely to be more efficient.
@@ -5930,7 +7252,7 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index,
const void* buffer, size_t length) __INTRODUCED_IN(27);
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
/**
* Sets an operand's per channel quantization parameters.
@@ -5955,28 +7277,33 @@ int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
ANeuralNetworksModel* model, int32_t index,
const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) __INTRODUCED_IN(29);
-#endif // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif // __ANDROID_API__ >= 29
/**
* Sets an operand to a value stored in a memory object.
*
* The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this call
- * yields undefined results.
+ * inside the model. The application must not change the content of the memory
+ * region until all executions using this model have completed. As the data may
+ * be copied during processing, modifying the data after this call yields
+ * undefined results.
+ *
+ * <p>The provided memory must outlive this model.</p>
*
* To indicate that an optional operand should be considered missing,
* use {@link ANeuralNetworksModel_setOperandValue} instead, passing nullptr for buffer.
*
- * Is disallowed to set an operand value with shared memory backed by an AHardwareBuffer
+ * It is disallowed to set an operand value with shared memory backed by an AHardwareBuffer
* of a format other than AHARDWAREBUFFER_FORMAT_BLOB.
*
+ * It is disallowed to set an operand value with memory created from
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
* Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
* called will return an error.
*
* See {@link ANeuralNetworksModel} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
* AHardwareBuffer usage.
*
* Available since API level 27.
@@ -5996,6 +7323,39 @@ int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model,
size_t offset, size_t length)
__INTRODUCED_IN(27);
+#if __ANDROID_API__ >= 30
+
+/**
+ * Sets an operand to a value that is a reference to another NNAPI model.
+ *
+ * The referenced model must already have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * The {@link ANeuralNetworksModel_relaxComputationFloat32toFloat16} setting of
+ * referenced models is overridden by that setting of the main model of a
+ * compilation.
+ *
+ * The referenced model must outlive the model referring to it.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param value The model to be referenced.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValueFromModel(ANeuralNetworksModel* model, int32_t index,
+ const ANeuralNetworksModel* value)
+ __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
/**
* Add an operation to a model.
*
@@ -6060,6 +7420,9 @@ int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, u
* must be calculated using at least the range and precision of the IEEE 754
* 32-bit floating-point format.
*
+ * The relaxComputationFloat32toFloat16 setting of the main model of
+ * a compilation overrides the values of the referenced models.
+ *
* @param model The model to be modified.
* @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
* calculated with range and/or precision as low as that of the
@@ -6083,7 +7446,11 @@ int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel*
/**
* Create a {@link ANeuralNetworksCompilation} to compile the given model.
*
- * <p>This only creates the object. Compilation is only performed once
+ * The model passed to this function is termed the "main model" of the
+ * compilation, to distinguish it from other models referred to by an Operand
+ * of type {@link ANEURALNETWORKS_MODEL} within this compilation.
+ *
+ * <p>This function only creates the object. Compilation is only performed once
* {@link ANeuralNetworksCompilation_finish} is invoked.</p>
*
* <p>{@link ANeuralNetworksCompilation_finish} should be called once
@@ -6114,7 +7481,7 @@ int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model,
* Destroy a compilation.
*
* The compilation need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
+ * {@link ANeuralNetworksCompilation_finish}.
*
* See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
*
@@ -6128,7 +7495,8 @@ void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation) __
/**
* Sets the execution preference.
*
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+ * <p>Provides guidance to the runtime when trade-offs are possible. By default the runtime
+ * uses PREFER_SINGLE_FAST_ANSWER</p>
*
* See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
*
@@ -6146,13 +7514,19 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compila
/**
* Indicate that we have finished modifying a compilation. Required before
- * calling {@link ANeuralNetworksExecution_create}.
+ * calling {@link ANeuralNetworksBurst_create} or
+ * {@link ANeuralNetworksExecution_create}.
*
- * An application is responsible to make sure that no other thread uses
- * the compilation at the same time.
+ * An application must ensure that no other thread uses the compilation at the
+ * same time.
*
* This function must only be called once for a given compilation.
*
+ * If {@link ANeuralNetworksCompilation_setTimeout} was called on this
+ * compilation, and the compilation is not able to be finished before the
+ * timeout duration is exceeded, then compilation may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned.
+ *
* See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
*
* Available since API level 27.
@@ -6163,11 +7537,85 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compila
*/
int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation) __INTRODUCED_IN(27);
+#if __ANDROID_API__ >= 30
+
+/**
+ * Set the execution priority.
+ *
+ * Execution priorities are relative to other executions created by the same
+ * application (specifically same uid) for the same device. Specifically,
+ * priorities of executions from one application will not affect executions from
+ * another application. Similarly, priorities of executions on one device will
+ * not affect executions on another device.
+ *
+ * Higher priority executions may use more compute resources than lower priority
+ * executions, and may preempt or starve lower priority executions.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param compilation The compilation to be modified.
+ * @param priority The relative priority of the execution compared to other
+ * executions created by the application. Must be one of
+ * ANEURALNETWORKS_PRIORITY_*.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation* compilation, int priority)
+ __INTRODUCED_IN(30);
+
+/**
+ * Set the maximum expected duration for compiling the model.
+ *
+ * If the device is not able to complete the compilation within the specified
+ * duration, the compilation may be aborted. The timeout duration begins at the
+ * call to {@link ANeuralNetworksCompilation_finish}.
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both free
+ * up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long a compilation will take to abort the
+ * compilation before it has even started if the driver believes the compilation
+ * cannot be completed within the timeout duration. Similarly, it enables
+ * drivers to abort an ongoing compilation if it is taking too long. However,
+ * this call does not guarantee that the compilation will complete or abort
+ * within the timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
+ * the timeout duration for compiling the model is considered infinite.
+ *
+ * The {@link ANeuralNetworksCompilation} must have been created with
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ * be spent finishing a compilation. If this duration is exceeded, the
+ * compilation may be aborted. If set to 0, the timeout duration is
+ * considered infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation* compilation,
+ uint64_t duration) __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
/**
* Create a {@link ANeuralNetworksExecution} to apply the given compilation.
* This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} is invoked.
*
* <p>The provided compilation must outlive the execution.</p>
*
@@ -6187,12 +7635,16 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
/**
* Destroy an execution.
*
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes. The related {@link ANeuralNetworksEvent}
- * will be signaled and the {@link ANeuralNetworksEvent_wait} will return
- * ANEURALNETWORKS_ERROR_DELETED.
+ * <p>The execution need not have been scheduled by a call to
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}; but if it has been scheduled,
+ * then the application must not call {@link ANeuralNetworksExecution_free}
+ * until the execution has completed (i.e.,
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute}, or
+ * {@link ANeuralNetworksEvent_wait} has returned).
*
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
@@ -6206,7 +7658,10 @@ void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) __INTROD
/**
* Associate a user buffer with an input of the model of the
* {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the buffer until the execution has
+ * completed. Evaluation of the execution will not change the content of the
+ * buffer.
*
* <p>The provided buffer must outlive the execution.</p>
*
@@ -6244,9 +7699,12 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32
size_t length) __INTRODUCED_IN(27);
/**
- * Associate part of a memory object with an input of the model of the
+ * Associate a region of a memory object with an input of the model of the
* {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the region until the execution has
+ * completed. Evaluation of the execution will not change the content of the
+ * region.
*
* <p>The provided memory must outlive the execution.</p>
*
@@ -6255,8 +7713,10 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32
* buffer and 0 for length.
*
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
* AHardwareBuffer usage.
+ * See {@link ANeuralNetworksMemory_createFromDesc} for information on usage of memory objects
+ * created from memory descriptors.
*
* Available since API level 27.
*
@@ -6290,7 +7750,9 @@ int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execut
/**
* Associate a user buffer with an output of the model of the
* {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the buffer until the execution has
+ * completed.
*
* If the output is optional, you can indicate that it is omitted by
* passing nullptr for buffer and 0 for length.
@@ -6333,9 +7795,11 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3
size_t length) __INTRODUCED_IN(27);
/**
- * Associate part of a memory object with an output of the model of the
+ * Associate a region of a memory object with an output of the model of the
* {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the region until the execution has
+ * completed.
*
* If the output is optional, you can indicate that it is omitted by
* using {@link ANeuralNetworksExecution_setOutput} instead, passing nullptr for
@@ -6344,8 +7808,10 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3
* <p>The provided memory must outlive the execution.</p>
*
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
* AHardwareBuffer usage.
+ * See {@link ANeuralNetworksMemory_createFromDesc} for information on usage of memory objects
+ * created from memory descriptors.
*
* Available since API level 27.
*
@@ -6385,8 +7851,8 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
/**
* Schedule asynchronous evaluation of the execution.
*
- * <p>Schedules asynchronous evaluation of the execution. Once the model has
- * been applied and the outputs are ready to be consumed, the returned event
+ * <p>Schedules asynchronous evaluation of the execution. Once the execution
+ * has completed and the outputs are ready to be consumed, the returned event
* will be signaled. Use {@link ANeuralNetworksEvent_wait} to wait for that
* event.
* </p>
@@ -6394,10 +7860,31 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
* ANeuralNetworksEvent_wait must be called to recuperate the resources used
* by the execution.
*
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on this execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned through
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksEvent_wait} on the event object. If the device has a
+ * feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel} that
+ * is lower than 30, then the timeout duration hint will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned through {@link ANeuralNetworksEvent_wait} on the event
+ * object.
+ *
+ * If the device can detect before the execution has started that the execution
+ * will not complete within the timeout duration, the device may choose to skip
+ * the execution and instead return {@link ANEURALNETWORKS_MISSED_DEADLINE_*}.
+ *
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
* See {@link ANeuralNetworksExecution_compute} for synchronous execution.
- * Synchronous execution incurs lower overhead than asynchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
*
* Available since API level 27.
*
@@ -6405,21 +7892,129 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
* @param event The event that will be signaled on completion. event is set to
* NULL if there's an error.
*
- * @return ANEURALNETWORKS_NO_ERROR if successful.
+ * @return ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.
*/
int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution,
ANeuralNetworksEvent** event) __INTRODUCED_IN(27);
+#if __ANDROID_API__ >= 30
+
+/**
+ * Set the maximum expected duration of the specified execution.
+ *
+ * If the device is not able to complete the execution within the specified
+ * duration, the execution may be aborted. The timeout duration begins at a
+ * call to one of:
+ * - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both free
+ * up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long an execution will take to abort the
+ * execution before it has even started if the driver believes the execution
+ * cannot be completed within the timeout duration. Similarly, it enables
+ * drivers to abort an ongoing execution if it is taking too long. However, this
+ * call does not guarantee that the execution will complete or abort within the
+ * timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
+ * the timeout duration for execution is considered infinite.
+ *
+ * The {@link ANeuralNetworksExecution} must have been created from an
+ * {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ * be spent executing a model. If this duration is exceeded, the execution
+ * may be aborted. If set to 0, the timeout duration is considered infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration)
+ __INTRODUCED_IN(30);
+
+/**
+ * Set the maximum duration of WHILE loops in the specified execution.
+ *
+ * This is a fuzzy per-loop timeout intended to prevent infinite loops.
+ *
+ * If a WHILE loop condition model does not output false within the specified
+ * duration, the execution will be aborted.
+ *
+ * See {@link ANeuralNetworks_getDefaultLoopTimeout} and
+ * {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
+ * and maximum timeout values.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that can be spent
+ * executing a WHILE loop. If the specified duration value exceeds the value
+ * produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
+ * overridden by that value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ * ANEURALNETWORKS_BAD_STATE if execution has started.
+ * ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_setLoopTimeout(ANeuralNetworksExecution* execution, uint64_t duration)
+ __INTRODUCED_IN(30);
+
+/**
+ * Get the default timeout value for WHILE loops.
+ *
+ * @return The default timeout value in nanoseconds.
+ *
+ * Available since API level 30.
+ */
+uint64_t ANeuralNetworks_getDefaultLoopTimeout() __INTRODUCED_IN(30);
+
+/**
+ * Get the maximum timeout value for WHILE loops.
+ *
+ * @return The maximum timeout value in nanoseconds.
+ *
+ * Available since API level 30.
+ */
+uint64_t ANeuralNetworks_getMaximumLoopTimeout() __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
/**
* Waits until the execution completes.
*
* More than one thread can wait on an event. When the execution completes,
* all threads will be released.
*
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on the execution
+ * corresponding to this event, and the execution is not able to complete
+ * before the duration is exceeded, the execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned here.
+ *
+ * If the execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * the execution will be aborted, and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned here.
+ *
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
* Available since API level 27.
*
+ * @param event The event that will be signaled on completion.
* @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
* ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot
* be properly mapped.
@@ -6432,13 +8027,140 @@ int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) __INTRODUCED_IN(27);
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
* Available since API level 27.
+ *
+ * @param event The event object to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
*/
void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) __INTRODUCED_IN(27);
#endif // __ANDROID_API__ >= 27
+#if __ANDROID_API__ >= 30
+/**
+ * Create a {@link ANeuralNetworksEvent} from a sync_fence file descriptor.
+ *
+ * The newly created ANeuralNetworksEvent does not take ownership of the provided sync_fence_fd,
+ * it will instead dup the provided sync_fence_fd and own the duplicate.
+ *
+ * @param sync_fence_fd The sync_fence file descriptor.
+ * @param event The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent** event)
+ __INTRODUCED_IN(30);
+
+/**
+ * Get sync_fence file descriptor from the event.
+ *
+ * If the ANeuralNetworksEvent is not backed by a sync fence, the sync_fence_fd
+ * will be set to -1, and ANEURALNETWORKS_BAD_DATA will be returned.
+ *
+ * See {@link ANeuralNetworksEvent_createFromSyncFenceFd} and
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} to see how to create
+ * an event backed by a sync fence.
+ *
+ * The user takes ownership of the returned fd, and must close the returned file descriptor when
+ * it is no longer needed.
+ *
+ * @param event An event that is backed by a sync fence.
+ * @param sync_fence_fd The sync_fence file descriptor. The file descriptor will
+ * be set to -1 if there is an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* sync_fence_fd)
+ __INTRODUCED_IN(30);
+
+/**
+ * Schedule asynchronous evaluation of the execution with dependencies.
+ *
+ * The execution will wait for all the depending events to be signaled before
+ * starting the evaluation. Once the execution has completed and the outputs
+ * are ready to be consumed, the returned event will be signaled. Depending on which
+ * devices are handling the execution, the event could be backed by a sync fence.
+ * Use {@link ANeuralNetworksEvent_wait} to wait for that event.
+ *
+ * ANeuralNetworksEvent_wait must be called to recurperate the resources used
+ * by the execution.
+ *
+ * If parts of the execution are scheduled on devices that do not support fenced execution,
+ * the function call may wait for such parts to finish before returning.
+ *
+ * The function will return an error if any of the events in dependencies is already in a bad
+ * state. After the execution is scheduled, if any of the events in dependencies does not complete
+ * normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
+ * event will return an error.
+ *
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds. This timeout
+ * duration acts as a hint to drivers in the same way that the timeout durations
+ * in {@link ANeuralNetworksCompilation_setTimeout} and {@link
+ * ANeuralNetworksExecution_setTimeout} act as hints to drivers. The duration
+ * begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If either
+ * the timeout duration from {@link ANeuralNetworksExecution_setTimeout} or the
+ * timeout duration passed to this call is exceeded, the execution may be
+ * aborted, in which case {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be
+ * returned through {@link ANeuralNetworksExecution_startComputeWithDependencies}
+ * or {@link ANeuralNetworksEvent_wait} on the event object. If the device has a
+ * feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel} that
+ * is lower than 30, then the timeout duration hints will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned through {@link ANeuralNetworksEvent_wait} on the event
+ * object.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ *
+ * @param execution The execution to be scheduled and executed.
+ * @param dependencies A set of depending events. The actual evaluation will not start
+ * until all the events are signaled.
+ * @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ * be spent executing the model after all dependencies are
+ * signaled. If set to 0, the timeout duration is considered
+ * infinite.
+ * @param event The event that will be signaled on completion. event is set to
+ * NULL if there's an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_startComputeWithDependencies(
+ ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
+ uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+ __INTRODUCED_IN(30);
+
+#endif // __ANDROID_API__ >= 30
+
__END_DECLS
-#endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+// For compatibility with android, check __ANDROID__ is defined
+#ifndef __ANDROID__
+#undef __ANDROID_API__
+#undef __INTRODUCED_IN
+#endif // __ANDROID__
/** @} */
diff --git a/runtime/nnapi-header/include/NeuralNetworksExtensions.h b/runtime/nnapi-header/include/NeuralNetworksExtensions.h
index ca2e04567..dd51b0301 100644
--- a/runtime/nnapi-header/include/NeuralNetworksExtensions.h
+++ b/runtime/nnapi-header/include/NeuralNetworksExtensions.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
-#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
#include "NeuralNetworks.h"
@@ -37,7 +37,7 @@
__BEGIN_DECLS
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
/**
* Queries whether an extension is supported by the driver implementation of the specified device.
@@ -110,8 +110,8 @@ int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel* model, in
const void* data, size_t length)
__INTRODUCED_IN(29);
-#endif // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif // __ANDROID_API__ >= 29
__END_DECLS
-#endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index 49a5aa071..9c6dd90cc 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -9,10 +9,16 @@ add_library(${ONERT_DEV} SHARED ${API_SRC})
set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
-target_link_libraries(${ONERT_DEV} PUBLIC onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage)
+# NOTE Below line is added to remove warning for android build
+# It will be removed after android build uses gold linker
+if (ANDROID)
+ target_link_libraries(${ONERT_DEV} INTERFACE log)
+endif (ANDROID)
+
target_include_directories(${ONERT_DEV} PUBLIC include)
set_target_properties(${ONERT_DEV} PROPERTIES PUBLIC_HEADER "${NNFW_API_HEADERS}")
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 42e43760b..8c6ea3994 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000900
+#define NNFW_VERSION 0x01000a00
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 81b40703f..aa066e190 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -112,7 +112,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
if (size == 0)
return NNFW_STATUS_ERROR;
- _subgraphs = onert::circle_loader::loadModel(buffer, size);
+ try
+ {
+ _subgraphs = onert::circle_loader::loadModel(buffer, size);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during model loading : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
_compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
_state = State::MODEL_LOADED;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index 31f1c10eb..b45b91058 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+#include <AclActivationBuilder.h>
+#include <AclFunction.h>
+#include <Convert.h>
+#include <Swizzle.h>
+
#include "ConstantInitializer.h"
namespace onert
@@ -96,6 +101,46 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
+void ConstantInitializer::visit(const ir::operation::Reverse &node)
+{
+ const auto &output_index = node.getOutputs().at(0);
+
+ const auto &input_index = node.getInputs().at(ir::operation::Reverse::Input::INPUT);
+ const auto &input_obj = _operands.at(input_index);
+
+ const auto &axis_index = node.getInputs().at(ir::operation::Reverse::Input::AXIS);
+ const auto &axis_obj = _operands.at(axis_index);
+
+ const auto ifm_rank = input_obj.shape().rank();
+ const auto frontend_layout = this->_current_op_seq_layout;
+
+ auto output_tensor = this->_tensor_reg->getITensor(output_index);
+ const auto backend_layout = output_tensor->layout();
+
+ if (axis_obj.isConstant())
+ {
+ _init_map[axis_index] = [ifm_rank, frontend_layout, backend_layout](const ir::Operand &operand,
+ backend::ITensor &obj) {
+ assert(operand.data());
+
+ const auto axis_value = *(reinterpret_cast<const int32_t *>(operand.data()->base()));
+ int32_t axis_tmp = axis_value;
+ if (axis_tmp < 0)
+ {
+ axis_tmp = axis_tmp + ifm_rank;
+ }
+
+ auto axis =
+ acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
+
+ obj.access([&](ITensor &tensor) {
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer());
+ *into = (int32_t)axis;
+ });
+ };
+ }
+}
+
} // namespace acl_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 4f894fd31..9f3acb461 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -38,6 +38,7 @@ public:
void visit(const ir::operation::Gather &) final;
void visit(const ir::operation::HashtableLookup &) final;
void visit(const ir::operation::SpaceToBatchND &) final;
+ void visit(const ir::operation::Reverse &) final;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index 94489253d..cc9afcaeb 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -78,9 +78,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
assert(_ctx.at(block_size_index).data());
@@ -98,9 +98,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -164,10 +164,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -202,10 +202,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -240,7 +240,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
@@ -268,7 +268,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
@@ -286,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -320,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -351,8 +351,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
output_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
@@ -365,8 +365,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
@@ -382,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -449,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -523,10 +523,23 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
+ // Disable applied dim_correction
+ if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(inputData_tensor);
+ }
+
auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
begin_mask, end_mask, shrink_axis_mask);
+ // Revert disabling applied dim_correction
+ if (inputData_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(inputData_tensor);
+ }
+
_return_fn = asAclFunction(std::move(fn));
}
@@ -534,22 +547,47 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
+ const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
- std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
- // Reversed
- auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
- rank, pv, frontend_layout, backend_layout);
+ const auto &perms = _ctx.at(perm_idx);
+ std::vector<int32_t> pv;
+ if (perms.shape() == ir::Shape{0})
+ {
+ pv.resize(rank);
+ std::iota(pv.begin(), pv.end(), 0);
+ std::reverse(pv.begin(), pv.end());
+ }
+ else
+ {
+ pv = _ctx.at(perm_idx).asVector<int32_t>();
+ }
- auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
- ofm_tensor->handle(), backend_pv);
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (rank == 1)
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
+ }
+ else if (rank == 2)
+ {
+ assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
+ fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
+ ofm_tensor->handle());
+ }
+ else
+ {
+ auto backend_pv =
+ acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+
+ fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -559,8 +597,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
node.param().op_type, node.param().alpha, node.param().beta);
@@ -577,9 +615,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
std::unique_ptr<arm_compute::IFunction> fn;
switch (node.param().op_type)
@@ -626,8 +664,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
std::unique_ptr<arm_compute::IFunction> fn;
switch (node.param().op_type)
@@ -647,7 +685,11 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
output_tensor->handle());
- ;
+ }
+ else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
+ output_tensor->handle());
}
else
{
@@ -719,8 +761,8 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
output_tensor->handle());
@@ -735,10 +777,10 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
- auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
@@ -764,9 +806,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
- auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
@@ -775,6 +817,56 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+ const auto output_idx{node.getOutputs().at(0)};
+ const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+ const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
+ const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
+ const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
+ const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
+ assert(depth > 0);
+
+ auto output_tensor = _tensor_reg->getAclTensor(output_idx);
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
+
+ const size_t output_rank = _ctx.at(output_idx).shape().rank();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_tensor->layout();
+ int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
+ axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
+
+ if (output_tensor->num_dimensions() != output_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and output_tensor is applied dim_correction
+ acl_common::disableDimCorrection(output_tensor);
+ }
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ const auto &offvalue = _ctx.at(offvalue_idx);
+ if (offvalue.isConstant())
+ {
+ fn = acl_common::generateLayer<arm_compute::CLOneHot>(
+ indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
+ acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
+ }
+ else
+ {
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
+ fn = acl_common::generateLayer<arm_compute::CLOneHot>(
+ indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
+ output_tensor->handle(), static_cast<uint32_t>(depth), axis);
+ }
+
+ if (output_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(output_tensor);
+ }
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
void KernelGenerator::visit(const ir::operation::Pack &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -786,41 +878,39 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index)->handle();
std::vector<arm_compute::ICLTensor *> inputs;
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
// Disable applied dim_correction
- std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
for (const auto &input_index : input_indexes)
{
- size_t input_rank = _ctx.at(input_index).shape().rank();
const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
- orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
- assert(input_rank == input_tensor->num_dimensions());
- if (input_rank != input_tensor->info()->num_dimensions())
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
- // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
- _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(input_tensor);
}
}
auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
// Revert disabling applied dim_correction
- assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
- for (size_t i = 0; i < inputs.size(); ++i)
+ for (const auto &input_index : input_indexes)
{
- inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+ if (input_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(input_tensor);
+ }
}
_return_fn = asAclFunction(std::move(fn));
@@ -833,7 +923,7 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto activation = node.param().activation;
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(raw_fn)),
@@ -845,8 +935,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -879,11 +969,10 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
{
const auto ofm_index{node.getOutputs().at(0)};
-
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLScale>(
ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
@@ -896,11 +985,10 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
{
const auto ofm_index{node.getOutputs().at(0)};
-
const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLScale>(
ifm_tensor->handle(), ofm_tensor->handle(),
@@ -925,14 +1013,14 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
- auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
- auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
@@ -954,10 +1042,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
- auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
@@ -976,8 +1064,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
ifm_tensor->handle(), ofm_tensor->handle(), block_size);
@@ -991,9 +1079,9 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
- auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+ auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
@@ -1020,8 +1108,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
@@ -1041,12 +1129,12 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
- auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
- auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
- auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
+ auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
@@ -1061,9 +1149,9 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
@@ -1096,9 +1184,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
@@ -1116,9 +1204,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
@@ -1140,9 +1228,9 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
- auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
+ auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
+ auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
@@ -1162,9 +1250,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1187,29 +1275,29 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(k == indices_tensor->num_dimensions());
// Disable applied dim_correction
- const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- const auto ifm = _ctx.at(ifm_index);
- ifm_tensor->info()->set_tensor_shape(
- acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
+ acl_common::disableDimCorrection(ifm_tensor);
}
- const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
- const auto indices = _ctx.at(indices_index);
- indices_tensor->info()->set_tensor_shape(
- acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
+ acl_common::disableDimCorrection(indices_tensor);
}
auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
- ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
- indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+ if (ifm_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(ifm_tensor);
+ }
+ if (indices_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(indices_tensor);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -1218,19 +1306,20 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
auto ifm_shape = _ctx.at(ifm_index).shape();
auto ofm_shape = _ctx.at(ofm_index).shape();
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
- int axis_value = node.param().axis;
+ int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
if (axis_value < 0)
{
axis_value += ifm_rank;
@@ -1239,7 +1328,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+ auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
::arm_compute::ReductionOperation::ARG_IDX_MAX);
@@ -1257,8 +1346,8 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
@@ -1277,8 +1366,8 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
input_tensor->handle(), output_tensor->handle(), block_size);
@@ -1289,22 +1378,27 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
void KernelGenerator::visit(const ir::operation::Split &node)
{
const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+ if (!_ctx.at(axis_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant axis_index NYI for acl_cl backend");
+ }
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
std::vector<ir::OperandIndex> output_indexes;
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
- auto axis = node.param().axis;
+ auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
@@ -1315,6 +1409,60 @@ void KernelGenerator::visit(const ir::operation::Split &node)
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+ const auto ifm_index{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+ const auto size_split_index{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+ const auto split_dim_index{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+ assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
+ std::vector<ir::OperandIndex> output_indexes;
+ for (const auto &output : node.getOutputs())
+ output_indexes.emplace_back(output);
+
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
+
+ std::vector<arm_compute::ICLTensor *> output_tensors;
+ for (const auto &ofm_ind : output_indexes)
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
+
+ auto fn = std::make_unique<arm_compute::CLSplitVEx>();
+ const auto &split_dim_op = _ctx.at(split_dim_index);
+ if (split_dim_op.isConstant())
+ {
+ int32_t split_dim = split_dim_op.asScalar<int32_t>();
+ uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = ifm_tensor->layout();
+
+ if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
+ acl_common::disableDimCorrection(ifm_tensor);
+ }
+
+ split_dim_revised =
+ acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
+ .value();
+ fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
+ output_tensors, node.param().num_splits);
+
+ if (ifm_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(ifm_tensor);
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Non-constant split_dim NYI for acl_cl backend");
+ }
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
void KernelGenerator::visit(const ir::operation::Unpack &node)
{
const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
@@ -1326,34 +1474,32 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
std::vector<arm_compute::ICLTensor *> outputs;
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
// Disable applied dim_correction
- std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
- for (const auto &output_index : output_indexes)
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
- size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
- assert(output_rank == output_tensor->num_dimensions());
- if (output_rank != output_tensor->info()->num_dimensions())
- {
- // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
- _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
- }
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(input_tensor);
}
- auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
+
+ // Revert disabling applied dim_correction
+ if (input_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(input_tensor);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -1373,11 +1519,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
- auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
- auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index)->handle();
+ auto output = _tensor_reg->getAclTensor(output_index)->handle();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -1391,21 +1537,26 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
}
// Disable applied dim_correction
- size_t input_rank = _ctx.at(input_index).shape().rank();
const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
- assert(input_rank == input_tensor->num_dimensions());
- if (input_rank != input_tensor->info()->num_dimensions())
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
- // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
- _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(input_tensor);
}
auto fn =
acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
- // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
- // It would produce a mistach of result
+ // NOTE Do not revert disabling applied dim_correction for 4D.
+ // It would produce a mistach of result by incorrect offset_first_element in
+ // ICLKernel::add_tensor_argument<3>().
+ // We have to disable applied dim_correction and not to revert enabling for the kernel that slices
+ // 4D to 3D because slicing arm_compute::Window can causes incorrect offset_first_element if the
+ // used tensor is 4D and the tensor's high dimention is 1
+ if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(input_tensor);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -1415,8 +1566,8 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
@@ -1429,8 +1580,8 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
@@ -1438,6 +1589,30 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::Reverse &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
+
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
+
+ // WORKAROUND: acl-cl backend only allow U32 type for axis
+ // ConstantInitializer will resolve S32 type to U32 type
+ if (_ctx.at(axis_index).isConstant() &&
+ (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
+ {
+ axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
+ }
+
+ auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
+ ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
+}
+
} // namespace acl_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index d188d6d83..e8a922677 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -59,6 +59,7 @@ public:
void visit(const ir::operation::InstanceNorm &) override;
void visit(const ir::operation::Comparison &) override;
void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
@@ -79,10 +80,12 @@ public:
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
+ void visit(const ir::operation::SplitV &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::ConvertFp32ToFp16 &) override;
void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+ void visit(const ir::operation::Reverse &) override;
private:
const ir::Operands &_ctx;
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 372ce689e..257bbd3b4 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,6 +30,20 @@ namespace backend
namespace acl_common
{
+void enableDimCorrection(IACLTensor *tensor)
+{
+ size_t input_rank = tensor->num_dimensions();
+ const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+ .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+}
+
+void disableDimCorrection(IACLTensor *tensor)
+{
+ size_t input_rank = tensor->num_dimensions();
+ const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+ .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+}
+
template <typename Layer, typename... Args>
std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
{
@@ -138,30 +152,27 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
const auto projection_clip = projection_threshold;
assert(cell_clip >= 0.f && projection_clip >= 0.f);
- auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
- auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
- auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
- auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+ auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
+ auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
+ auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
+ auto output_tensor = tensor_reg->getAclTensor(output_index);
- auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+ auto input_tensor = tensor_reg->getAclTensor(input_index);
- auto input_to_forget_weights_tensor =
- tensor_reg->getAclTensor(input_to_forget_weights_index).get();
- auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
- auto input_to_output_weights_tensor =
- tensor_reg->getAclTensor(input_to_output_weights_index).get();
+ auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
+ auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
+ auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
auto recurrent_to_forget_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+ auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
auto recurrent_to_output_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index);
- auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
- auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
- auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
- auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
- auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
+ auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
+ auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
+ auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
+ auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
+ auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
auto act_info = asActivationLayerInfo(activation);
@@ -169,13 +180,13 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index); // optional
auto recurrent_to_input_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
: nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
+ auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -183,19 +194,18 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
auto cell_to_output_weights_tensor =
- tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
if (has_projection_param)
{
- auto projection_weights_tensor =
- tensor_reg->getAclTensor(projection_weights_index).get(); // optional
- auto projection_bias_handle =
- has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
- : nullptr; // optional
+ auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
+ auto projection_bias_handle = has_projection_bias
+ ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
@@ -260,10 +270,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
reshape.dim(1) = input_size; /* W */
}
- auto output_tensor = tensor_reg->getAclTensor(output_index).get();
- const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
- const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
- const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index);
+ const auto input_tensor = tensor_reg->getAclTensor(input_index);
+ const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
+ const auto bias_tensor = tensor_reg->getAclTensor(bias_index);
const auto frontend_layout = layout;
const auto acl_layout = output_tensor->handle()->info()->data_layout();
@@ -313,8 +323,8 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
- auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
::arm_compute::PoolingLayerInfo info{
pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
index 83d7ad6fd..beec95718 100644
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -61,8 +61,14 @@ public:
for (const auto &ind : inputs)
{
- // NOTE Not support the case that concat's input is a constant or a input of model
- if (_graph.operands().at(ind).isConstant() || _graph.getInputs().contains(ind))
+ /**
+ * NOTE Not support below cases.
+ * 1. concat's input is a constant.
+ * 2. concat's input is a input of model.
+ * 3. concat's input already becomes a subtensor of another concat.
+ */
+ if (_graph.operands().at(ind).isConstant() || _graph.getInputs().contains(ind) ||
+ _parent_map.find(ind) != _parent_map.end())
{
return;
}
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 91452014b..bb7abc95d 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -70,8 +70,6 @@ public:
void allocate() override;
void postFunctionPrepare() override;
- std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -161,7 +159,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
else
{
// SubTensors
-
assert(!info.isConstant() && "Subtensors of constants are not supported yet.");
// Update offset info and emplace
@@ -306,13 +303,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::unique_ptr<ITensorManager>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
-{
- return std::move(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
{
assert(_tensor_mgr->constTensors().size() == 0);
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
index 1ef9f4b35..02d66db99 100644
--- a/runtime/onert/backend/acl_common/AclTensorRegistry.h
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -36,17 +36,11 @@ template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorR
public:
AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
- std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
- {
- return _tensor_mgr->at(ind);
- }
+ ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
- std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
- {
- return getITensor(ind);
- }
+ ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getITensor(ind); }
- auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+ auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
private:
T_AclTensorManager *_tensor_mgr;
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67dcc8192..6ef6a2dc3 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -112,6 +112,8 @@ namespace acl_common
return ::arm_compute::DataType::S8;
case ir::DataType::FLOAT16:
return ::arm_compute::DataType::F16;
+ case ir::DataType::INT64:
+ return ::arm_compute::DataType::S64;
default:
throw std::runtime_error("Not supported, yet");
break;
@@ -299,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
return ir::DataType::QUANT_INT8_SYMM;
case ::arm_compute::DataType::F16:
return ir::DataType::FLOAT16;
+ case ::arm_compute::DataType::S64:
+ return ir::DataType::INT64;
default:
throw std::runtime_error{"Not supported, yet"};
break;
@@ -335,6 +339,27 @@ arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType
}
}
+arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
+{
+ assert(operand.isConstant());
+ assert(operand.shape().num_elements() == 1);
+ switch (operand.typeInfo().type())
+ {
+ case ir::DataType::INT32:
+ return arm_compute::PixelValue(operand.asScalar<int32_t>());
+ case ir::DataType::INT64:
+ return arm_compute::PixelValue(operand.asScalar<int64_t>());
+ case ir::DataType::UINT32:
+ return arm_compute::PixelValue(operand.asScalar<uint64_t>());
+ case ir::DataType::UINT8:
+ return arm_compute::PixelValue(operand.asScalar<uint8_t>());
+ case ir::DataType::FLOAT32:
+ return arm_compute::PixelValue(operand.asScalar<float>());
+ default:
+ throw std::runtime_error("asPixelValue : Not supported datatype yet");
+ }
+}
+
} // namespace acl_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 380321c07..0b36df102 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -17,6 +17,7 @@
#ifndef __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
#define __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
+#include <arm_compute/core/PixelValue.h>
#include <arm_compute/core/TensorInfo.h>
#include <arm_compute/core/SubTensorInfo.h>
#include <arm_compute/core/TensorShape.h>
@@ -85,6 +86,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
+arm_compute::PixelValue asPixelValue(const ir::Operand &operand);
+
} // namespace acl_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 6d53c1245..598d043e7 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -18,7 +18,6 @@
#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
-#include <arm_compute/runtime/CPP/functions/CPPOneHotEx.h>
#include <AclActivationBuilder.h>
#include <AclFunction.h>
@@ -75,15 +74,16 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
- int axis_value = node.param().axis;
+ int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
if (axis_value < 0)
{
axis_value += ifm_rank;
@@ -106,9 +106,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
assert(_ctx.at(block_size_index).data());
@@ -126,9 +126,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
std::unique_ptr<arm_compute::IFunction> fn;
switch (node.param().arithmetic_type)
@@ -190,10 +190,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -214,8 +214,8 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
input_tensor->handle(), output_tensor->handle(), block_size);
@@ -245,10 +245,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -282,7 +282,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
@@ -312,8 +312,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
node.param().op_type, node.param().alpha, node.param().beta);
@@ -343,9 +343,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
std::unique_ptr<arm_compute::IFunction> fn;
switch (node.param().op_type)
@@ -390,8 +390,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
std::unique_ptr<arm_compute::IFunction> fn;
switch (node.param().op_type)
@@ -412,6 +412,11 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
output_tensor->handle());
}
+ else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
+ {
+ fn = acl_common::generateLayer<arm_compute::NECastBool>(input_tensor->handle(),
+ output_tensor->handle());
+ }
else
{
fn = acl_common::generateLayer<arm_compute::NECast>(
@@ -480,9 +485,9 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
- auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+ auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
@@ -493,7 +498,7 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
@@ -512,12 +517,12 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
- auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
- auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
- auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
+ auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
@@ -539,9 +544,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -567,24 +572,26 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- const auto ifm = _ctx.at(ifm_index);
- ifm_tensor->info()->set_tensor_shape(
- acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
+ acl_common::disableDimCorrection(ifm_tensor);
}
if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
- const auto indices = _ctx.at(indices_index);
- indices_tensor->info()->set_tensor_shape(
- acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
+ acl_common::disableDimCorrection(indices_tensor);
}
auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
- // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
- // use arm_compute::TensorInfo::offset_element_in_bytes()
- // It would create an error when the kernel accesses high dimension that its value is 1
+ // Revert disabling applied dim_correction
+ if (ifm_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(ifm_tensor);
+ }
+ if (indices_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(indices_tensor);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -596,10 +603,10 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
- auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
@@ -630,8 +637,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
@@ -653,8 +660,8 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
@@ -682,13 +689,13 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index)->handle();
std::vector<arm_compute::ITensor *> inputs;
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
axis += output_rank;
@@ -697,22 +704,25 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
// Disable applied dim_correction
for (const auto &input_index : input_indexes)
{
- size_t input_rank = _ctx.at(input_index).shape().rank();
const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
- assert(input_rank == input_tensor->num_dimensions());
- if (input_rank != input_tensor->info()->num_dimensions())
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
- // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
- _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(input_tensor);
}
}
auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
- // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
- // use arm_compute::TensorInfo::offset_element_in_bytes()
- // It would create an error when the kernel accesses high dimension that its value is 1
+ // Revert disabling applied dim_correction
+ for (const auto &input_index : input_indexes)
+ {
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+ if (input_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(input_tensor);
+ }
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -727,8 +737,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto rank = _ctx.at(input_index).shape().rank();
auto pad_base = _ctx.at(pad_index).data()->base();
- auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
- auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index)->handle();
+ auto output = _tensor_reg->getAclTensor(output_index)->handle();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -737,7 +747,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -764,7 +774,7 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto activation = node.param().activation;
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(raw_fn)),
@@ -776,8 +786,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -812,9 +822,9 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
@@ -828,8 +838,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -866,8 +876,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -887,11 +897,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
{
const auto ofm_index{node.getOutputs().at(0)};
-
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::NEScale>(
ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
@@ -916,14 +925,14 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
- auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
- auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
- auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
@@ -949,8 +958,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
output_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
@@ -962,25 +971,26 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = input_tensor->layout();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
// Disable applied dim_correction
- const size_t input_rank = _ctx.at(input_index).shape().rank();
- if (input_rank != input_tensor->info()->num_dimensions())
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and input tensor is applied dim_correction
- const auto input = _ctx.at(input_index);
- input_tensor->info()->set_tensor_shape(
- acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
+ acl_common::disableDimCorrection(input_tensor);
}
auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
output_tensor->handle(), beta);
+ // Revert disabling applied dim_correction
+ if (input_tensor->dimension(0) == 1)
+ {
+ acl_common::disableDimCorrection(input_tensor);
+ }
+
_return_fn = asAclFunction(std::move(fn));
}
@@ -992,10 +1002,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
- auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
@@ -1014,8 +1024,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
ifm_tensor->handle(), ofm_tensor->handle(), block_size);
@@ -1027,22 +1037,27 @@ void KernelGenerator::visit(const ir::operation::Split &node)
{
// TODO Support this op by SubTensor
const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+ if (!_ctx.at(axis_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant axis_index NYI for acl_neon backend");
+ }
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
std::vector<ir::OperandIndex> output_indexes;
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
- auto axis = node.param().axis;
+ auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
@@ -1059,9 +1074,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
@@ -1076,8 +1091,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1141,8 +1156,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1211,10 +1226,23 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
+ // Disable applied dim_correction
+ if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(inputData_tensor);
+ }
+
auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
begin_mask, end_mask, shrink_axis_mask);
+ // Revert disabling applied dim_correction
+ if (inputData_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(inputData_tensor);
+ }
+
_return_fn = asAclFunction(std::move(fn));
}
@@ -1244,9 +1272,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
@@ -1261,26 +1289,43 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
+ const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
- const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+ const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
-
const auto rank = _ctx.at(ifm_idx).shape().rank();
- std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
- auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
- rank, pv, frontend_layout, backend_layout);
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
+ const auto &perms = _ctx.at(perm_idx);
+ std::vector<int32_t> pv;
+ if (perms.shape() == ir::Shape{0})
+ {
+ pv.resize(rank);
+ std::iota(pv.begin(), pv.end(), 0);
+ std::reverse(pv.begin(), pv.end());
+ }
+ else
+ {
+ pv = _ctx.at(perm_idx).asVector<int32_t>();
+ }
+
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (rank == 1)
{
+ fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
+ }
+ else if (rank == 2)
+ {
+ assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
ofm_tensor->handle());
}
else
{
+ auto backend_pv =
+ acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+
fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
ofm_tensor->handle(), backend_pv);
}
@@ -1298,34 +1343,32 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
std::vector<arm_compute::ITensor *> outputs;
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
// Disable applied dim_correction
- std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
- for (const auto &output_index : output_indexes)
+ if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
{
- size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
- assert(output_rank == output_tensor->num_dimensions());
- if (output_rank != output_tensor->info()->num_dimensions())
- {
- // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
- _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
- }
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ acl_common::disableDimCorrection(input_tensor);
}
- auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
+
+ // Revert disabling applied dim_correction
+ if (input_tensor->dimension(0) == 1)
+ {
+ acl_common::enableDimCorrection(input_tensor);
+ }
_return_fn = asAclFunction(std::move(fn));
}
@@ -1335,8 +1378,8 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
output_tensor->handle());
@@ -1352,9 +1395,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
- auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
- auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index);
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
@@ -1370,15 +1413,20 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
- const auto axis = node.param().axis;
- auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
- auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
- auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
- auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
- auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+ auto output_tensor = _tensor_reg->getAclTensor(out_idx);
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
+ auto depth_tensor = _tensor_reg->getAclTensor(depth_idx);
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
+
+ const size_t output_rank = _ctx.at(out_idx).shape().rank();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = output_tensor->layout();
+ int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
+ axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+ auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
offvalue_tensor->handle(), output_tensor->handle(), axis);
_return_fn = asAclFunction(std::move(fn));
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 6627412d2..32e249f5a 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -39,16 +39,13 @@ public:
ExternalContext() : _ruy_context(new ruy::Context)
{
setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
-#ifdef USE_RUY_GEMV
- _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
-#endif
}
void setMaxNumThreads(int max_num_threads)
{
const int target_num_threads =
max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
- _ruy_context->max_num_threads = target_num_threads;
+ _ruy_context->set_max_num_threads(target_num_threads);
}
ruy::Context *ruy_context() const { return _ruy_context.get(); }
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 74b6f0c6b..5f330ff50 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -232,12 +232,10 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _return_fn_seq->enableDynamicShapeInferer(true);
_current_op_seq_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
@@ -272,10 +270,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
const auto stride = node.param().stride;
const auto activation = node.param().activation;
@@ -332,10 +330,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
- auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
@@ -353,11 +351,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::ConcatLayer>();
@@ -372,9 +370,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
- auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
- auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
- auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index);
+ auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
@@ -384,7 +382,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
if (node.getInputs().size() != NNApiInputs)
{
const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
- crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
+ crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
}
fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -398,9 +396,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto value_tensor = _tensor_reg->getPortableTensor(value_index);
auto fn = std::make_unique<ops::FillLayer>();
@@ -419,11 +417,10 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
- auto bias_tensor =
- bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
auto fn = std::make_unique<ops::FullyConnectedLayer>();
@@ -438,8 +435,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
// optional 2nd input
IPortableTensor *shape_tensor = nullptr;
@@ -447,7 +444,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
+ shape_tensor = _tensor_reg->getPortableTensor(shape_index);
}
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -461,8 +458,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -479,8 +476,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::SoftMaxLayer>();
@@ -497,9 +494,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
@@ -515,9 +512,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
auto comparison_type = node.param().comparison_type;
@@ -534,9 +531,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -575,11 +572,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
- auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
- auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
- auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
+ auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
+ auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
+ auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
assert(indices_tensor->data_type() == OperandType::INT32);
assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -595,10 +592,10 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto equation = node.param().equation;
@@ -613,7 +610,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
{
auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
std::vector<custom::TypeInfo> &types,
- std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
+ std::vector<IPortableTensor *> &tensors) {
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
@@ -642,8 +639,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
@@ -659,9 +656,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
@@ -676,8 +673,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
@@ -692,9 +689,9 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ExpandDimsLayer>();
@@ -712,11 +709,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
assert(-rank <= axis && axis < rank);
- auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::PackLayer>();
@@ -734,11 +731,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
- output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::UnpackLayer>();
@@ -756,8 +753,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto output_index{node.getOutputs().at(0)};
assert(_ctx.at(pad_index).data());
- auto input = _tensor_reg->getPortableTensor(input_index).get();
- auto output = _tensor_reg->getPortableTensor(output_index).get();
+ auto input = _tensor_reg->getPortableTensor(input_index);
+ auto output = _tensor_reg->getPortableTensor(output_index);
auto pad_rank = _ctx.at(pad_index).shape().dim(0);
auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
@@ -780,13 +777,15 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
auto fn = std::make_unique<ops::TransposeLayer>();
- fn->configure(input_tensor, output_tensor, node.param().perm);
+ fn->configure(input_tensor, perm_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -798,9 +797,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
@@ -828,10 +827,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
- auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
- auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index);
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index);
auto fn = std::make_unique<ops::SelectLayer>();
@@ -847,10 +846,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
- auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
+ auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
auto fn = std::make_unique<ops::SliceLayer>();
@@ -867,11 +866,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
- auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
- auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
+ auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
+ auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
@@ -891,19 +890,18 @@ void KernelGenerator::visit(const ir::operation::Split &node)
assert(num_splits == static_cast<int>(node.getOutputs().size()));
const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
- const auto rank = _ctx.at(input_idx).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto axis_resolved = axis < 0 ? axis + rank : axis;
+ const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
- auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitLayer>();
- fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
+ fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
_return_fn = std::move(fn);
}
@@ -913,8 +911,8 @@ void KernelGenerator::visit(const ir::operation::Shape &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
auto fn = std::make_unique<ops::ShapeLayer>();
@@ -928,18 +926,37 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
- auto output_height = node.param().height_out;
- auto output_width = node.param().width_out;
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::ResizeBilinearLayer>();
- fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
- half_pixel_centers);
+ if (node.getInputs().size() == 1)
+ {
+ fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
+ align_corners, half_pixel_centers);
+ }
+ else
+ {
+ assert(node.getInputs().size() == 2);
+ const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
+ auto size_tensor = _tensor_reg->getPortableTensor(size_index);
+ if (size_tensor->is_constant())
+ {
+ auto size_vec = _ctx.at(size_index).asVector<int32_t>();
+ const auto height_out = size_vec[0];
+ const auto width_out = size_vec[1];
+ fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
+ half_pixel_centers);
+ }
+ else
+ {
+ fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
+ }
+ }
_return_fn = std::move(fn);
}
@@ -950,9 +967,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ReverseLayer>();
@@ -965,15 +982,15 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
- const auto axis = node.param().axis;
-
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
_return_fn = std::move(fn);
}
@@ -992,8 +1009,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
auto fn = std::make_unique<ops::PoolLayer>();
@@ -1010,9 +1027,9 @@ void KernelGenerator::visit(const ir::operation::Pow &node)
const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
auto fn = std::make_unique<ops::PowLayer>();
@@ -1026,8 +1043,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
- auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::L2NormLayer>();
@@ -1043,10 +1060,10 @@ void KernelGenerator::visit(const ir::operation::Range &node)
const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
- auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
- auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto start_tensor = _tensor_reg->getPortableTensor(start_index);
+ auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
+ auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
auto fn = std::make_unique<ops::RangeLayer>();
@@ -1059,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Rank &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
auto fn = std::make_unique<ops::RankLayer>();
@@ -1075,9 +1092,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
auto fn = std::make_unique<ops::SqDiffLayer>();
@@ -1091,9 +1108,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
auto fn = std::make_unique<ops::TileLayer>();
@@ -1108,10 +1125,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
- auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
+ auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
@@ -1125,9 +1142,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
- auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
@@ -1144,9 +1161,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
auto fn = std::make_unique<ops::BroadcastToLayer>();
@@ -1159,10 +1176,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
@@ -1183,8 +1200,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
@@ -1200,10 +1217,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
- auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
+ auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
@@ -1218,8 +1235,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
const auto output_index{node.getOutputs().at(0)};
auto block_size = node.param().block_size;
- auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto fn = std::make_unique<ops::SpaceToDepthLayer>();
@@ -1233,9 +1250,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
- auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
- auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
- auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+ auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
+ auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
@@ -1252,13 +1269,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
- auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
- auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
- auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
+ auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
+ auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitVLayer>();
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
index 78c98dabf..3edac897c 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ b/runtime/onert/backend/cpu/StaticTensorManager.cc
@@ -41,7 +41,7 @@ void StaticTensorManager::allocateNonconsts(void)
for (auto &pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
- auto tensor = pair.second;
+ auto tensor = pair.second.get();
if (!_as_constants[ind] && !tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
@@ -62,13 +62,14 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
assert(!_tensors->getITensor(ind));
if (as_const)
{
- auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, tensor);
+ auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, std::move(tensor));
}
else
{
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
- _tensors->setNativeTensor(ind, tensor);
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+ _dynamic_tensor_manager->dynamic_mem_mgr().get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
}
_as_constants[ind] = as_const;
}
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h b/runtime/onert/backend/cpu/Tensor.cc
index fa2a2d54c..dac8f898b 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h
+++ b/runtime/onert/backend/cpu/Tensor.cc
@@ -14,23 +14,19 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
-
-#include "backend/ITensorRegistry.h"
-#include "UserTensor.h"
+#include "Tensor.h"
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace cpu
{
-using UserTensorRegistry = PortableTensorRegistryTemplate<UserTensor>;
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
-} // namespace controlflow
+} // namespace cpu
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 20e60260c..2ad2ad0fb 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -41,6 +41,7 @@ class ExternalTensor : public Tensor
{
public:
ExternalTensor() = delete;
+ virtual ~ExternalTensor();
public:
ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
@@ -95,6 +96,21 @@ public:
}
}
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ void reset_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ _data.reset();
+ _buffer = nullptr;
+ }
+
+ int32_t num_references() override { return _num_references; }
+
private:
std::shared_ptr<const ir::Data> _data;
};
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index 828d52f7c..e6bc55b0b 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -85,16 +85,6 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
-{
- return std::move(_static_tensor_mgr);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
-{
- return std::move(_dynamic_tensor_mgr);
-}
-
} // namespace cpu
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index b6d5f09cc..448abc229 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -58,12 +58,8 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
- std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
-
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index d7b0b2bce..2fd284c91 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -44,24 +44,29 @@ template <typename T> std::function<bool(T, T)> GetComparefunction(bool is_arg_m
}
}
-void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output, int32_t axis,
- bool is_arg_max)
+void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const IPortableTensor *axis, bool is_arg_max)
{
_input = input;
_output = output;
- if (axis < 0)
- {
- axis += input->num_dimensions();
- }
_axis = axis;
_is_arg_max = is_arg_max;
}
void ArgMinMaxLayer::run()
{
-#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type) \
- ArgMinMax(getTensorShape(_input), reinterpret_cast<const input_type *>(_input->buffer()), \
- getTensorShape(_output), reinterpret_cast<output_type *>(_output->buffer()), _axis, \
+ if (_axis->total_size() != sizeof(int32_t))
+ {
+ throw std::runtime_error("ArgMinMax: wrong shape of axis");
+ }
+ auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+ if (axis < 0)
+ {
+ axis += _input->num_dimensions();
+ }
+#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type) \
+ ArgMinMax(getTensorShape(_input), reinterpret_cast<const input_type *>(_input->buffer()), \
+ getTensorShape(_output), reinterpret_cast<output_type *>(_output->buffer()), axis, \
GetComparefunction<input_type>(_is_arg_max));
if (_output->data_type() == ir::DataType::INT32)
{
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
index d7c021624..4c864cb98 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
@@ -33,18 +33,18 @@ namespace ops
class ArgMinMaxLayer : public ::onert::exec::IFunction
{
public:
- ArgMinMaxLayer() : _input(nullptr), _output(nullptr), _axis(-1), _is_arg_max(true) {}
+ ArgMinMaxLayer() : _input(nullptr), _output(nullptr), _axis(nullptr), _is_arg_max(true) {}
public:
- void configure(const IPortableTensor *indices, IPortableTensor *output, int32_t axis,
- bool is_arg_max);
+ void configure(const IPortableTensor *indices, IPortableTensor *output,
+ const IPortableTensor *axis, bool is_arg_max);
void run() override;
private:
const IPortableTensor *_input;
IPortableTensor *_output;
- int32_t _axis;
+ const IPortableTensor *_axis;
bool _is_arg_max;
};
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
index f50c63375..8e51daad5 100644
--- a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -34,20 +34,21 @@ template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
nnfw::cker::BinaryArithmeticOpParam op_params)
{
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+ const auto lhs_shape = getTensorShape(lhs);
+ const auto rhs_shape = getTensorShape(rhs);
+ const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes(lhs_shape, rhs_shape, &op_params);
if (need_broadcast)
{
nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
- op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ op_params, lhs_shape, reinterpret_cast<const T *>(lhs->buffer()), rhs_shape,
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
reinterpret_cast<T *>(output->buffer()));
return;
}
nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
- op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ op_params, lhs_shape, reinterpret_cast<const T *>(lhs->buffer()), rhs_shape,
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
reinterpret_cast<T *>(output->buffer()));
}
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index 05da33abf..f873a3430 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -158,16 +158,30 @@ void FullyConnectedLayer::fullyConnectedSparseWeight()
op_params.float_activation_max = output_activation_max;
op_params.activation = convertActivationType(_activation);
- int w0_size = getTensorShape(_weights).Dims(0);
- const uint16_t *w1_segments = _weights->w1_segments();
- const uint16_t *w1_indices = _weights->w1_indices();
+ const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
+ const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
- nnfw::cker::FullyConnectedSparseWeight(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
- w1_indices);
+ auto block_size = _weights->sparsity()->block_size();
+ if (block_size.size() == 0)
+ {
+ nnfw::cker::FullyConnectedSparseWeightRandom(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
+ w1_indices);
+ }
+ else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
+ {
+ nnfw::cker::FullyConnectedSparseWeight16x1(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
+ w1_indices);
+ }
+ else
+ throw std::runtime_error{"FullyConnected: unsupported sparsity"};
}
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
@@ -191,7 +205,7 @@ void FullyConnectedLayer::run()
{
fullyConnectedHybrid();
}
- else if (_weights->is_sparse())
+ else if (_weights->sparsity())
{
fullyConnectedSparseWeight();
}
@@ -239,17 +253,11 @@ void FullyConnectedLayer::prepare()
const int rows = getTensorShape(_weights).Dims(0);
if (rows % 4 == 0)
{
- const int total_input_size = getTensorShape(_input).FlatSize();
- const int input_size = getTensorShape(_weights).Dims(1);
- const int batch_size = total_input_size / input_size;
- if (batch_size <= 4)
- {
- // TODO If it's possible to extract precaching from ruy kernel,
- // place this instead of below code
+ // TODO If it's possible to extract precaching from ruy kernel,
+ // place this instead of below code
- // buffer will be used by ruy kernel as a cache key
- _cached_weights = _weights->buffer();
- }
+ // buffer will be used by ruy kernel as a cache key
+ _cached_weights = _weights->buffer();
}
#endif
}
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 98385521a..eb24dd43c 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -95,27 +95,18 @@ inline nnfw::cker::Shape getTensorShape(const IPortableTensor *tensor)
if (tensor == nullptr)
return nnfw::cker::Shape();
+ const ir::Shape &shape = tensor->get_info().shape();
+
assert(tensor->layout() == ir::Layout::NHWC);
- constexpr int kMaxSmallSize = 8;
- int32_t raw_shape_small[kMaxSmallSize];
- std::vector<int32_t> raw_shape_vec;
- auto rank = tensor->num_dimensions();
- int32_t *data = nullptr;
- if (rank > kMaxSmallSize)
- {
- raw_shape_vec.resize(rank);
- data = raw_shape_vec.data();
- }
- else
- {
- data = raw_shape_small;
- }
- for (uint32_t i = 0; i < rank; ++i)
+ auto rank = shape.rank();
+ nnfw::cker::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
{
- data[i] = tensor->dimension(i);
+ data[i] = shape.dim(i);
}
- return nnfw::cker::Shape(rank, data);
+ return ret;
}
inline nnfw::cker::FusedActivationFunctionType
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index bb5f85d60..4a55b2a33 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -18,6 +18,7 @@
#include "OperationUtils.h"
+#include "cker/neon/neon_check.h"
#include <cker/operation/Reduce.h>
namespace onert
@@ -158,7 +159,7 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
ReduceLayer::ReduceLayer()
: _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
- _kernel()
+ _kernel(), _reduceType(ReduceType::kInvalid)
{
// DO NOTHING
}
@@ -171,8 +172,9 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
_input = input;
_axes = axes;
_output = output;
+ _reduceType = reduceType;
- switch (reduceType)
+ switch (_reduceType)
{
case ReduceType::kSum:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
@@ -199,13 +201,23 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
_kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
break;
default:
- throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
+ throw std::runtime_error{"Reduce: Unsupported reduce type"};
}
}
void ReduceLayer::run()
{
const auto axes = getReducerAxes(_axes);
+#ifdef USE_NEON
+ int32_t rank = _input->num_dimensions();
+ if (_input->data_type() == ir::DataType::FLOAT32 && _reduceType == ReduceType::kSum &&
+ axes.size() == 1 && (axes[0] == -1 || axes[0] == rank - 1))
+ {
+ OptimizedReduceSum(reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_input),
+ reinterpret_cast<float *>(_output->buffer()));
+ return;
+ }
+#endif // NEON
_kernel(_input, _output, axes);
}
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 332d399bd..8265dd41f 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -17,6 +17,8 @@
#ifndef __ONERT_BACKEND_CPU_OPS_REDUCESUMLAYER_H__
#define __ONERT_BACKEND_CPU_OPS_REDUCESUMLAYER_H__
+#include "cker/neon/neon_check.h"
+
#include <backend/IPortableTensor.h>
#include <exec/IFunction.h>
@@ -47,6 +49,7 @@ enum class ReduceType
kMin,
kAny,
kAll,
+ kInvalid // For debug and initialize
};
class ReduceLayer : public ::onert::exec::IFunction
@@ -70,6 +73,8 @@ private:
std::function<void(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes)>
_kernel;
+
+ ReduceType _reduceType;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
index 180094bb8..1fe56cb99 100644
--- a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -28,16 +28,39 @@ namespace ops
{
ResizeBilinearLayer::ResizeBilinearLayer()
- : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
- _half_pixel_centers(false)
+ : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
+ _align_corners(false), _half_pixel_centers(false)
{
// DO NOTHING
}
void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const IPortableTensor *size, bool align_corners,
+ bool half_pixel_centers)
+{
+ assert(!size->is_constant());
+ _input = input;
+ _output = output;
+ _size = size;
+ _align_corners = align_corners;
+ _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
int32_t output_height, int32_t output_width, bool align_corners,
bool half_pixel_centers)
{
+ assert(_size == nullptr);
+ if (output_height < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " +
+ std::to_string(output_height)};
+ }
+ if (output_width < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " +
+ std::to_string(output_width)};
+ }
_input = input;
_output = output;
_output_height = output_height;
@@ -49,10 +72,19 @@ void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTenso
void ResizeBilinearLayer::run()
{
nnfw::cker::ResizeBilinearParams params;
+ if (_size == nullptr)
+ {
+ params.output_height = _output_height;
+ params.output_width = _output_width;
+ }
+ else
+ {
+ const auto size_buf = reinterpret_cast<const int32_t *>(_size->buffer());
+ params.output_height = size_buf[0];
+ params.output_width = size_buf[1];
+ }
params.align_corners = _align_corners;
params.half_pixel_centers = _half_pixel_centers;
- params.output_height = _output_height;
- params.output_width = _output_width;
switch (_input->data_type())
{
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
index fc49b348e..d7ae1c620 100644
--- a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
@@ -36,7 +36,10 @@ public:
ResizeBilinearLayer();
public:
- void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+ void configure(const IPortableTensor *input1, IPortableTensor *output,
+ const IPortableTensor *size, bool align_corners, bool half_pixel_centers);
+
+ void configure(const IPortableTensor *input, IPortableTensor *output, int32_t output_height,
int32_t output_width, bool align_corners, bool half_pixel_centers);
void run() override;
@@ -44,6 +47,7 @@ public:
private:
const IPortableTensor *_input;
IPortableTensor *_output;
+ const IPortableTensor *_size;
int32_t _output_height;
int32_t _output_width;
bool _align_corners;
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 095e67abc..b42be3042 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -62,7 +62,11 @@ void SoftMaxLayer::softmaxFloat32()
}
else
{
- throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ nnfw::cker::reference::Softmax(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
}
}
diff --git a/runtime/onert/backend/cpu/ops/SplitLayer.cc b/runtime/onert/backend/cpu/ops/SplitLayer.cc
index 1f40654c1..922cde2e3 100644
--- a/runtime/onert/backend/cpu/ops/SplitLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SplitLayer.cc
@@ -29,7 +29,7 @@ namespace cpu
namespace ops
{
-SplitLayer::SplitLayer() : _input(nullptr), _num_splits(0), _axis(0), _outputs()
+SplitLayer::SplitLayer() : _input(nullptr), _axis(nullptr), _num_splits(0), _outputs()
{
// DO NOTHING
}
@@ -37,7 +37,16 @@ SplitLayer::SplitLayer() : _input(nullptr), _num_splits(0), _axis(0), _outputs()
template <typename T> void SplitLayer::split(void)
{
nnfw::cker::SplitParams op_params;
- op_params.axis = _axis;
+ if (_axis->total_size() != sizeof(int32_t))
+ {
+ throw std::runtime_error("ArgMinMax: wrong shape of axis");
+ }
+ auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+ if (axis < 0)
+ {
+ axis += _input->num_dimensions();
+ }
+ op_params.axis = axis;
op_params.num_split = _num_splits;
std::vector<T *> outputPtrs;
@@ -53,8 +62,8 @@ template <typename T> void SplitLayer::split(void)
getTensorShape(_outputs[0]), outputPtrs.data());
}
-void SplitLayer::configure(const IPortableTensor *input, uint16_t num_splits, int16_t axis,
- std::vector<IPortableTensor *> &outputs)
+void SplitLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
+ uint16_t num_splits, std::vector<IPortableTensor *> &outputs)
{
assert(input != nullptr);
diff --git a/runtime/onert/backend/cpu/ops/SplitLayer.h b/runtime/onert/backend/cpu/ops/SplitLayer.h
index 0719a0063..090f87166 100644
--- a/runtime/onert/backend/cpu/ops/SplitLayer.h
+++ b/runtime/onert/backend/cpu/ops/SplitLayer.h
@@ -38,15 +38,15 @@ public:
public:
template <typename T> void split(void);
- void configure(const IPortableTensor *input, uint16_t num_splits, int16_t axis,
+ void configure(const IPortableTensor *input, const IPortableTensor *axis, uint16_t num_splits,
std::vector<IPortableTensor *> &outputs);
void run() override;
private:
const IPortableTensor *_input;
+ const IPortableTensor *_axis;
uint16_t _num_splits;
- int16_t _axis;
std::vector<IPortableTensor *> _outputs;
};
diff --git a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
index dcbb87734..f77f4d691 100644
--- a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
@@ -37,17 +37,17 @@ StridedSliceLayer::StridedSliceLayer()
template <typename T> void StridedSliceLayer::stridedSliceImpl()
{
+ const auto input_shape = getTensorShape(_input);
+ const auto output_shape = getTensorShape(_output);
auto op_params = nnfw::cker::buildStridedSliceParams(
reinterpret_cast<uint32_t *>(_begin->buffer()), reinterpret_cast<uint32_t *>(_end->buffer()),
reinterpret_cast<uint32_t *>(_strides->buffer()), _begin_mask, _end_mask, _shrink_axis_mask,
- getTensorShape(_input).DimensionsCount());
+ input_shape.DimensionsCount());
- nnfw::cker::checkOutputSize(op_params, getTensorShape(_input), getTensorShape(_output),
- getTensorShape(_input).DimensionsCount());
+ nnfw::cker::checkOutputSize(op_params, input_shape, output_shape, input_shape.DimensionsCount());
- nnfw::cker::StridedSlice(op_params, getTensorShape(_input),
- reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::StridedSlice(op_params, input_shape, reinterpret_cast<const T *>(_input->buffer()),
+ output_shape, reinterpret_cast<T *>(_output->buffer()));
}
void StridedSliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/TransposeLayer.cc b/runtime/onert/backend/cpu/ops/TransposeLayer.cc
index 7b232562a..3362c3396 100644
--- a/runtime/onert/backend/cpu/ops/TransposeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/TransposeLayer.cc
@@ -19,6 +19,7 @@
#include "OperationUtils.h"
#include <cker/operation/Transpose.h>
+#include <numeric>
namespace onert
{
@@ -29,7 +30,7 @@ namespace cpu
namespace ops
{
-TransposeLayer::TransposeLayer() : _input(nullptr), _output(nullptr), _perm()
+TransposeLayer::TransposeLayer() : _input(nullptr), _perm(nullptr), _output(nullptr)
{
// DO NOTHING
}
@@ -37,10 +38,23 @@ TransposeLayer::TransposeLayer() : _input(nullptr), _output(nullptr), _perm()
template <typename T> void TransposeLayer::transpose()
{
nnfw::cker::TransposeParams param;
- param.perm_count = _perm.size();
- for (size_t i = 0; i < _perm.size(); i++)
+ assert(_perm->num_dimensions() == 1);
+
+ param.perm_count = _input->num_dimensions();
+ if (_perm->dimension(0) == 0) // This means _perm is (n-1...0)
+ {
+ const auto begin = param.perm;
+ const auto end = param.perm + _input->num_dimensions();
+ std::iota(begin, end, 0);
+ std::reverse(begin, end);
+ }
+ else
{
- param.perm[i] = _perm[i];
+ assert(param.perm_count == static_cast<int>(_perm->dimension(0)));
+ for (auto i = 0; i < param.perm_count; i++)
+ {
+ param.perm[i] = *(reinterpret_cast<const int32_t *>(_perm->buffer()) + i);
+ }
}
nnfw::cker::Transpose(param, getTensorShape(_input),
@@ -63,8 +77,8 @@ void TransposeLayer::transposeQuant8()
transpose<uint8_t>();
}
-void TransposeLayer::configure(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &perm)
+void TransposeLayer::configure(const IPortableTensor *input, const IPortableTensor *perm,
+ IPortableTensor *output)
{
_input = input;
_perm = perm;
diff --git a/runtime/onert/backend/cpu/ops/TransposeLayer.h b/runtime/onert/backend/cpu/ops/TransposeLayer.h
index f9cb12770..c8e9f8ae7 100644
--- a/runtime/onert/backend/cpu/ops/TransposeLayer.h
+++ b/runtime/onert/backend/cpu/ops/TransposeLayer.h
@@ -40,15 +40,15 @@ public:
void transposeQuant8();
- void configure(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &perm);
+ void configure(const IPortableTensor *input, const IPortableTensor *perm,
+ IPortableTensor *output);
void run() override;
private:
const IPortableTensor *_input;
+ const IPortableTensor *_perm;
IPortableTensor *_output;
- std::vector<int> _perm;
};
} // namespace ops
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index d58b47ced..344b2a972 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -13,6 +13,11 @@ target_link_libraries(onert_core PRIVATE nnfw_coverage)
target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
target_link_libraries(onert_core PRIVATE jsoncpp)
target_link_libraries(onert_core INTERFACE ruy_instrumentation)
+# NOTE Below line is added to remove warning for android build
+# It will be removed after android build uses gold linker
+if (ANDROID)
+ target_link_libraries(onert_core INTERFACE log)
+endif (ANDROID)
if(ENVVAR_ONERT_CONFIG)
target_compile_definitions(onert_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
diff --git a/runtime/onert/core/include/backend/CustomKernelBuilder.h b/runtime/onert/core/include/backend/CustomKernelBuilder.h
index 101272135..cae2fc1a3 100644
--- a/runtime/onert/core/include/backend/CustomKernelBuilder.h
+++ b/runtime/onert/core/include/backend/CustomKernelBuilder.h
@@ -49,10 +49,10 @@ struct TypeInfo
struct CustomKernelConfigParams
{
- std::vector<std::shared_ptr<backend::IPortableTensor>> input_tensors;
+ std::vector<backend::IPortableTensor *> input_tensors;
std::vector<TypeInfo> input_types;
- std::vector<std::shared_ptr<backend::IPortableTensor>> output_tensors;
+ std::vector<backend::IPortableTensor *> output_tensors;
std::vector<TypeInfo> output_types;
char *userdata;
diff --git a/runtime/onert/core/include/backend/IDynamicTensorManager.h b/runtime/onert/core/include/backend/IDynamicTensorManager.h
index 343c52c4a..67cfda24e 100644
--- a/runtime/onert/core/include/backend/IDynamicTensorManager.h
+++ b/runtime/onert/core/include/backend/IDynamicTensorManager.h
@@ -39,24 +39,12 @@ struct IDynamicTensorManager : public ITensorManager
public:
/**
- * @brief Set new shape and allocate memory for dynamic tensor.
- * If a tensor is dynamic tensor and previously allocated memory exists,
- * it will be deallocated.
- * If a tensor is static tensor (with previously allocated memory by StaticTensorManager),
- * tensor->buffer() will be overwrite to the dynamically allocated memory
- * @param ind operand index of a tensor
- * @param new_shape tensor's new shape. While allocating memory for this new_shape,
- * tensor's shape is set to new_shape
- */
- virtual void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) = 0;
-
- /**
* @brief Plan when to delete a tensor. Note this planning is done at compilation time.
* @param op_ind operation index
- * @param operand_ind operand index of input operand of first param op. Operand can be static
+ * @param tensor candidate ITensor to dealloc. Tensor can be static
* or dynamic since tensor type may not be clearly known at compilation time.
*/
- virtual void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) = 0;
+ virtual void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) = 0;
/**
* @brief Deallocate input tensors of op if an input tensor is a dynamic tensor and it won't
@@ -64,12 +52,6 @@ public:
* @note This will work after calling planDealloc
*/
virtual void deallocInput(ir::OperationIndex op_ind) = 0;
-
- /**
- * @brief Deallocate an output tensor if the tensor is a dynamic tensor
- * @note This will work after calling planDealloc
- */
- virtual void deallocSubgraphOutput(ir::OperandIndex ind) = 0;
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/IPortableTensor.h b/runtime/onert/core/include/backend/IPortableTensor.h
index a05b39a33..1b1f05fe1 100644
--- a/runtime/onert/core/include/backend/IPortableTensor.h
+++ b/runtime/onert/core/include/backend/IPortableTensor.h
@@ -18,6 +18,8 @@
#define __ONERT_BACKEND_I_PORTABLE_TENSOR_H__
#include "backend/ITensor.h"
+#include "ir/OperandInfo.h"
+#include "ir/Sparsity.h"
namespace onert
{
@@ -36,14 +38,18 @@ namespace backend
class IPortableTensor : public ITensor
{
public:
- virtual ~IPortableTensor() = default;
- virtual bool is_sparse() const { return false; }
- virtual const uint16_t *w1_segments() const { return nullptr; }
- virtual const uint16_t *w1_indices() const { return nullptr; }
+ IPortableTensor(const ir::OperandInfo &info) : _info(info) {}
+
+ virtual ~IPortableTensor();
+ virtual const ir::Sparsity *sparsity() const { return nullptr; }
+ const ir::OperandInfo &get_info() const { return _info; }
public:
bool has_padding() const final { return false; }
void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+protected:
+ ir::OperandInfo _info;
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 12b1c5433..b18dd30a2 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -53,13 +53,19 @@ public:
virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
/**
- * @brief Return the dynamic tensor manager
+ * @brief Set the shape to @c shape and possibly re-allocate the buffer
*
- * If dynamic tensors are not supported, it returns @c nullptr .
+ * If a tensor is dynamic tensor and previously allocated memory exists,
+ * it will be deallocated.
+ * If a tensor is static tensor (with previously allocated memory by StaticTensorManager),
+ * @c buffer() will be overwriten
*
- * @return IDynamicTensorManager* DynamicTensorManager
+ * @param shape tensor's new shape. While allocating memory for this new_shape,
+ * tensor's shape is set to new_shape
+ * @return true If applying shape is successful
+ * @return false If not applying shape is not supported (it throws for other errors)
*/
- virtual IDynamicTensorManager *dynamic_tensor_manager() { return nullptr; }
+ virtual bool applyShape(const ir::Shape &) { return false; }
/**
* @brief Return true if the tensor is constant
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
index f93ab81ae..97721cf19 100644
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -89,14 +89,6 @@ public: // methods for static tensor allocation
*/
virtual void postFunctionPrepare() = 0;
- /**
- * @brief Release static @c ITensorManger object which was built
- * Before calling this, @c allocate must have been called
- *
- * @return std::unique_ptr<ITensorManager> Tensor Manager object
- */
- virtual std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) = 0;
-
public: // methods for dynamic tensor allocation
/**
* @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception
@@ -108,14 +100,6 @@ public: // methods for dynamic tensor allocation
* to the end of execution
*/
virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
-
- /**
- * @brief Release dynamic @c ITensorManger object which was built
- * Before calling this, @c allocate must have been called
- *
- * @return std::unique_ptr<ITensorManager> Tensor Manager object
- */
- virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h
index 88fcb0fcd..b256a1fb8 100644
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -43,7 +43,7 @@ struct ITensorRegistry
*
* @note Return tensor cannot be used longer than dynamic tensor manager
*/
- virtual std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &) = 0;
+ virtual ITensor *getITensor(const ir::OperandIndex &) = 0;
/**
* @brief Returns pointer of ITensor among native tensors
*
@@ -51,17 +51,14 @@ struct ITensorRegistry
*
* @note Returned tensor cannot be used longer than dynamic tensor manager
*/
- virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+ virtual ITensor *getNativeITensor(const ir::OperandIndex &) = 0;
/**
* @brief Set the Migrant Tensor which are from other backends
*
* @return true if supported
* @return false if not supported
*/
- virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
- {
- return false;
- }
+ virtual bool setMigrantTensor(const ir::OperandIndex &, IPortableTensor *) { return false; }
};
} // namespace backend
@@ -85,41 +82,37 @@ namespace backend
template <typename T_Tensor> class PortableTensorRegistryTemplate : public ITensorRegistry
{
public:
- std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ ITensor *getITensor(const ir::OperandIndex &ind) override
{
static_assert(std::is_base_of<ITensor, T_Tensor>::value, "T_Tensor must derive from ITensor.");
- auto external_tensor = _migrant.find(ind);
- if (external_tensor != _migrant.end())
- return external_tensor->second;
+ auto _migrant_tensor = _migrant.find(ind);
+ if (_migrant_tensor != _migrant.end())
+ return _migrant_tensor->second;
return getNativeTensor(ind);
}
- std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
- {
- return getNativeTensor(ind);
- }
+ ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getNativeTensor(ind); }
- std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+ IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
{
- auto external_tensor = _migrant.find(ind);
- if (external_tensor != _migrant.end())
+ auto _migrant_tensor = _migrant.find(ind);
+ if (_migrant_tensor != _migrant.end())
{
- if (external_tensor->second)
- return external_tensor->second;
+ if (_migrant_tensor->second)
+ return _migrant_tensor->second;
}
return getNativeTensor(ind);
}
- std::shared_ptr<T_Tensor> getNativeTensor(const ir::OperandIndex &ind)
+ T_Tensor *getNativeTensor(const ir::OperandIndex &ind)
{
auto tensor = _native.find(ind);
if (tensor != _native.end())
- return tensor->second;
+ return tensor->second.get();
return nullptr;
}
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override
+ bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
{
assert(tensor != nullptr);
auto itr = _native.find(ind);
@@ -129,25 +122,22 @@ public:
return true;
}
- void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
+ void setNativeTensor(const ir::OperandIndex &ind, std::unique_ptr<T_Tensor> &&tensor)
{
assert(tensor != nullptr);
auto itr = _migrant.find(ind);
if (itr != _migrant.end())
throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
- _native[ind] = tensor;
+ _native[ind] = std::move(tensor);
}
- const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &native_tensors() { return _native; }
+ const ir::OperandIndexMap<std::unique_ptr<T_Tensor>> &native_tensors() { return _native; }
- const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &migrant_tensors()
- {
- return _migrant;
- }
+ const ir::OperandIndexMap<IPortableTensor *> &migrant_tensors() { return _migrant; }
private:
- ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _migrant;
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _native;
+ ir::OperandIndexMap<IPortableTensor *> _migrant;
+ ir::OperandIndexMap<std::unique_ptr<T_Tensor>> _native;
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h b/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
index e3c8c8666..c4e06aa82 100644
--- a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
@@ -44,14 +44,16 @@ public:
virtual ~DynamicTensorManager() = default;
- void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override;
-
void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
ir::Layout backend_layout);
- void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override;
+ void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) override;
void deallocInput(ir::OperationIndex op_ind) override;
- void deallocSubgraphOutput(ir::OperandIndex ind) override;
+
+ std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
+
+private:
+ const ITensor *getRawITensor(ir::OperandIndex ind);
private:
/**
@@ -63,7 +65,8 @@ private:
// contains list of dynamic tensor index, which can be deallocated after running operation
// note: this map could contain static tensor index too. Careful use is required.
- std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map;
+ std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
+ _dealloc_tensor_map;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h b/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
index 4be7a1a11..28ec6b803 100644
--- a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
@@ -20,12 +20,14 @@
#include "Allocator.h"
#include "backend/IMemoryManager.h"
#include "IMemoryPlanner.h"
-#include "ir/OperandIndexMap.h"
namespace onert
{
namespace backend
{
+
+class ITensor;
+
namespace cpu_common
{
@@ -59,12 +61,12 @@ public:
DynamicMemoryManager() = default;
virtual ~DynamicMemoryManager() = default;
- std::shared_ptr<Allocator> allocate(const ir::OperandIndex &ind, uint32_t capacity);
- void deallocate(const ir::OperandIndex &ind);
+ std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
+ void deallocate(const ITensor *tensor);
void deallocate(void);
private:
- ir::OperandIndexMap<std::shared_ptr<Allocator>> _mem_alloc_map;
+ std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index 3f09b7a4a..fa50b551e 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -20,7 +20,6 @@
#include "MemoryManager.h"
#include "backend/IStaticTensorManager.h"
-#include "backend/IDynamicTensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
@@ -32,11 +31,13 @@ namespace backend
namespace cpu_common
{
+class DynamicTensorManager;
+
class StaticTensorManager : public backend::IStaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
- IDynamicTensorManager *dynamic_tensor_manager);
+ DynamicMemoryManager *dynamic_mem_mgr);
virtual ~StaticTensorManager() = default;
void allocateConsts(void);
@@ -57,7 +58,7 @@ private:
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
- IDynamicTensorManager *_dynamic_tensor_manager;
+ DynamicMemoryManager *_dynamic_mem_mgr;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h
index 974501ecb..5fa20e15d 100644
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h
@@ -29,16 +29,19 @@ namespace backend
namespace cpu_common
{
+class DynamicMemoryManager;
+
class Tensor : public IPortableTensor
{
public:
Tensor() = delete;
+ virtual ~Tensor();
public:
Tensor(const ir::OperandInfo &info, const ir::Layout layout,
- IDynamicTensorManager *dynamic_tensor_manager)
- : _info(info), _layout(layout), _buffer(nullptr), _num_references(0),
- _dynamic_tensor_manager(dynamic_tensor_manager), _allocator(nullptr)
+ DynamicMemoryManager *dynamic_mem_mgr)
+ : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
+ _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
{
// DO NOTHING
}
@@ -94,7 +97,7 @@ public:
* W : dimension(2)
* C : dimension(3)
*/
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+ size_t dimension(size_t index) const final override { return _info.shape().dim(index); }
size_t num_dimensions() const override { return _info.shape().rank(); }
size_t total_size() const override { return _info.total_size(); }
size_t calcOffset(const ir::Coordinates &coords) const override;
@@ -105,10 +108,8 @@ public:
bool is_constant() const override { return _info.isConstant(); }
bool is_dynamic() const override { return _info.isDynamic(); }
void set_dynamic() override { _info.setDynamic(); }
- IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
- bool is_sparse() const override { return _info.typeInfo().sparse(); }
- virtual const uint16_t *w1_segments() const override { return _info.typeInfo().w1_segments(); }
- virtual const uint16_t *w1_indices() const override { return _info.typeInfo().w1_indices(); }
+ bool applyShape(const ir::Shape &new_shape) override;
+ const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
virtual void increase_ref()
{
@@ -118,6 +119,7 @@ public:
++_num_references;
}
+
virtual void decrease_ref()
{
assert(_buffer != nullptr || _allocator != nullptr);
@@ -136,14 +138,34 @@ public:
}
}
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ virtual void reset_ref()
+ {
+ assert(_buffer != nullptr || _allocator != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ // Only constant tensor has allocator pointer
+ if (_buffer != nullptr)
+ _buffer = nullptr;
+ else
+ {
+ _allocator->release();
+ _allocator = nullptr;
+ }
+ }
+
+ virtual int32_t num_references() { return _num_references; }
+
void setShape(const ir::Shape &new_shape) override;
protected:
- ir::OperandInfo _info;
ir::Layout _layout;
uint8_t *_buffer;
int32_t _num_references;
- IDynamicTensorManager *_dynamic_tensor_manager;
+ DynamicMemoryManager *_dynamic_mem_mgr;
private:
/**
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h
index b97cb5b7b..5af11074e 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -70,6 +70,8 @@ private:
// TODO Define visitors for operations. List them in alphabetic order.
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BCQFullyConnected &op) override;
+ void visit(const ir::operation::BCQGather &op) override;
void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
void visit(const ir::operation::Comparison &op) override;
@@ -85,6 +87,7 @@ private:
void visit(const ir::operation::Gather &op) override;
void visit(const ir::operation::If &op) override;
void visit(const ir::operation::L2Normalization &op) override;
+ void visit(const ir::operation::LSTM &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h
index 6f6659659..4a86708d0 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -51,6 +51,8 @@ public:
// Remove TODO when any op starting from the alphabet is added
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BCQFullyConnected &op) override;
+ void visit(const ir::operation::BCQGather &op) override;
void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
void visit(const ir::operation::Comparison &op) override;
@@ -65,6 +67,7 @@ public:
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
void visit(const ir::operation::L2Normalization &op) override;
+ void visit(const ir::operation::LSTM &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index 79a58ed00..49f00dba1 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -79,7 +79,6 @@ public: // methods related to dynamic tensor
const ir::OpSequence *op_seq = nullptr;
const ir::Operations *operations = nullptr;
std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
- std::shared_ptr<backend::ITensorRegistry> tensor_registry = nullptr;
backend::IDynamicTensorManager *dynamic_tensor_manager = nullptr;
};
@@ -104,14 +103,25 @@ public: // methods related to dynamic tensor
*/
void enableDynamicShapeInferer(bool enable)
{
- _enable_dynamic_shape_inferer = _enable_dynamic_shape_inferer && enable;
+ _enable_dynamic_shape_inferer = _enable_dynamic_shape_inferer || enable;
}
+ /**
+ * @brief Call this function to initialize vars before running
+ * @note When we run a model with static tensor input and then run with dynamic tensor input,
+ * _enable_dynamic_shape_inferer is set to @c false at first run.
+ * Once _enable_dynamic_shape_inferer is set to @c true it cannot be changed to @c false
+ * only with calling enableDynamicShapeInferer(). So initializing it to @c false is
+ * necessary.
+ * @todo This is a quick fix. Adding this will increase time for run(). Find way to optimize.
+ */
+ void initRunning() { _enable_dynamic_shape_inferer = false; }
+
protected:
std::vector<std::unique_ptr<IFunction>> _functions;
protected:
- bool _enable_dynamic_shape_inferer = true;
+ bool _enable_dynamic_shape_inferer = false;
std::shared_ptr<DynamicTensorCtx> _dynamic_tensor_ctx = nullptr;
};
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 6c8bab67c..1d2831dd0 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -69,21 +69,6 @@ struct IExecutor
using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
-// TODO Move this structure to suitable place
-/**
- * @brief Dynamic allocation info for input tensors
- * When user sets shape of input having unknown dims after compilation, memory for the input
- * should be allocated before executing kernels. This struct contains information to allocate
- * memory.
- */
-struct DynAllocInfo
-{
- /// @brief index of input tensor whose memory needs to be allocated at execution time
- ir::OperandIndex ind;
-};
-
-using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/include/ir/Operand.h b/runtime/onert/core/include/ir/Operand.h
index 1b3a43b02..f149a744b 100644
--- a/runtime/onert/core/include/ir/Operand.h
+++ b/runtime/onert/core/include/ir/Operand.h
@@ -40,6 +40,7 @@ public:
{
// DO NOTHING
}
+ explicit Operand(const Operand &) = default;
public:
const Shape &shape(void) const { return _info.shape(); }
diff --git a/runtime/onert/core/include/ir/OperandIndexSequence.h b/runtime/onert/core/include/ir/OperandIndexSequence.h
index aa01eccaa..2f78cc832 100644
--- a/runtime/onert/core/include/ir/OperandIndexSequence.h
+++ b/runtime/onert/core/include/ir/OperandIndexSequence.h
@@ -82,6 +82,8 @@ public:
public:
std::vector<OperandIndex>::const_iterator begin(void) const { return _vec.begin(); }
std::vector<OperandIndex>::const_iterator end(void) const { return _vec.end(); }
+ std::vector<OperandIndex>::iterator begin(void) { return _vec.begin(); }
+ std::vector<OperandIndex>::iterator end(void) { return _vec.end(); }
private:
std::vector<OperandIndex> _vec;
diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h
index b8e123027..67aeb0e65 100644
--- a/runtime/onert/core/include/ir/OperandInfo.h
+++ b/runtime/onert/core/include/ir/OperandInfo.h
@@ -117,6 +117,7 @@ public:
MemAllocType memAllocType() const { return _alloc_type; }
void setAsConstant() { _const = true; }
+ void setAsNonConst() { _const = false; }
bool isConstant() const
{
// Impossible case: constant and dynamic operand
diff --git a/runtime/onert/core/include/ir/Operation.h b/runtime/onert/core/include/ir/Operation.h
index 818bd913b..89f7e340d 100644
--- a/runtime/onert/core/include/ir/Operation.h
+++ b/runtime/onert/core/include/ir/Operation.h
@@ -34,9 +34,12 @@ struct OperationVisitor;
class Operation
{
public:
+ // TODO Remove default parameter
Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs);
- explicit Operation(OperandConstraint input_constr);
+ const OperandIndexSequence &outputs,
+ OperandConstraint output_constr = OperandConstraint::createAny());
+ explicit Operation(OperandConstraint input_constr,
+ OperandConstraint output_constr = OperandConstraint::createAny());
Operation(const Operation &) = default;
Operation(Operation &&) = default;
@@ -62,6 +65,7 @@ public:
private:
OperandConstraint _input_constr;
+ OperandConstraint _output_constr;
OperandIndexSequence _inputs;
OperandIndexSequence _outputs;
};
diff --git a/runtime/onert/core/include/ir/Sparsity.h b/runtime/onert/core/include/ir/Sparsity.h
new file mode 100644
index 000000000..ad4d8259b
--- /dev/null
+++ b/runtime/onert/core/include/ir/Sparsity.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __ONERT_IR_SPARSITY_H__
+#define __ONERT_IR_SPARSITY_H__
+
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+namespace onert
+{
+namespace ir
+{
+
+/**
+ * @brief Structure for Sparse Tensor
+ */
+struct Sparsity
+{
+public:
+ Sparsity() = default;
+ Sparsity(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices,
+ std::vector<int32_t> &&block_size)
+ : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
+ {
+ }
+
+ /**
+ * @brief Returns segments array. See compressed sparse row format.
+ */
+ const uint16_t *w1_segments() const { return _w1_segments.data(); }
+ /**
+ * @brief Returns indices array. See compressed sparse row format.
+ */
+ const uint16_t *w1_indices() const { return _w1_indices.data(); }
+ /**
+ * @brief Returns block size which is used for block sparsity
+ */
+ const std::vector<int32_t> &block_size() const { return _block_size; }
+
+private:
+ std::vector<uint16_t> _w1_segments;
+ std::vector<uint16_t> _w1_indices;
+ std::vector<int32_t> _block_size;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_SPARSITY_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
index 3f7eab4c0..a1ae4d2e4 100644
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -18,9 +18,11 @@
#define __ONERT_IR_TYPEINFO_H__
#include <cstdint>
+#include <memory>
#include <vector>
#include "ir/DataType.h"
+#include "ir/Sparsity.h"
namespace onert
{
@@ -33,7 +35,7 @@ public:
TypeInfo() = delete;
explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
- : _type(type), _scale(scale), _offset(offset), _sparse(false)
+ : _type(type), _scale(scale), _offset(offset), _sparsity(nullptr)
{
}
@@ -41,18 +43,11 @@ public:
DataType type() const { return _type; }
float scale() const { return _scale; }
int32_t offset() const { return _offset; }
- bool sparse() const { return _sparse; }
- const uint16_t *w1_segments() const { return _w1_segments.data(); }
- const uint16_t *w1_indices() const { return _w1_indices.data(); }
+ const ir::Sparsity *sparsity() const { return _sparsity.get(); }
+ void sparsity(std::shared_ptr<ir::Sparsity> sparsity) { _sparsity = sparsity; }
public:
void type(const DataType type) { _type = type; }
- void sparse2DMetadata(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices)
- {
- _sparse = true;
- _w1_segments = w1_segments;
- _w1_indices = w1_indices;
- }
private:
DataType _type;
@@ -60,9 +55,7 @@ private:
float _scale;
int32_t _offset;
// for sparsity
- bool _sparse;
- std::vector<uint16_t> _w1_segments;
- std::vector<uint16_t> _w1_indices;
+ std::shared_ptr<ir::Sparsity> _sparsity;
};
bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMax.h
index 8400e1f1e..ea7eabb83 100644
--- a/runtime/onert/core/include/ir/operation/ArgMax.h
+++ b/runtime/onert/core/include/ir/operation/ArgMax.h
@@ -31,12 +31,12 @@ class ArgMax : public Operation
public:
enum Input
{
- INPUT
+ INPUT = 0,
+ AXIS = 1
};
struct Param
{
- int axis;
DataType output_type;
};
diff --git a/runtime/onert/core/include/ir/operation/LSTM.h b/runtime/onert/core/include/ir/operation/LSTM.h
index 1e6c00bf3..027bc6b42 100644
--- a/runtime/onert/core/include/ir/operation/LSTM.h
+++ b/runtime/onert/core/include/ir/operation/LSTM.h
@@ -26,6 +26,7 @@ namespace ir
namespace operation
{
+// This operation supports only unidirectional sequence lstm
class LSTM : public Operation
{
public:
@@ -51,6 +52,10 @@ public:
PROJECTION_BIAS = 17,
OUTPUT_STATE_IN = 18,
CELL_STATE_IN = 19,
+ INPUT_LAYER_NORMALIZATION_WEIGHTS = 20,
+ FORGET_LAYER_NORMALIZATION_WEIGHTS = 21,
+ CELL_LAYER_NORMALIZATION_WEIGHTS = 22,
+ OUTPUT_LAYER_NORMALIZATION_WEIGHTS = 23,
};
enum Output
@@ -66,6 +71,7 @@ public:
Activation activation;
float cell_threshold;
float projection_threshold;
+ bool time_major;
};
public:
@@ -73,6 +79,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
+ std::string name() const override;
OpCode opcode() const final { return OpCode::LSTM; }
public:
diff --git a/runtime/onert/core/include/ir/operation/ResizeBilinear.h b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
index 29aa496d7..ab330c826 100644
--- a/runtime/onert/core/include/ir/operation/ResizeBilinear.h
+++ b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
@@ -34,10 +34,12 @@ public:
enum Input
{
INPUT = 0,
+ SIZE = 1,
};
struct Param
{
+ // If the input SIZE exists in inputs, height_out and width_out are not set. Ignore these params
int32_t height_out;
int32_t width_out;
bool align_corners;
diff --git a/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
index e4d810eeb..10827803e 100644
--- a/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -34,10 +34,12 @@ public:
enum Input
{
INPUT = 0,
+ SIZE = 1,
};
struct Param
{
+ // If the input SIZE exists in inputs, Be height_out and width_out not set. Ignore these params
int32_t height_out;
int32_t width_out;
bool align_corners;
diff --git a/runtime/onert/core/include/ir/operation/Split.h b/runtime/onert/core/include/ir/operation/Split.h
index 60e0fdf15..c415941a4 100644
--- a/runtime/onert/core/include/ir/operation/Split.h
+++ b/runtime/onert/core/include/ir/operation/Split.h
@@ -29,12 +29,12 @@ class Split : public Operation
public:
enum Input
{
- INPUT = 0
+ AXIS = 0,
+ INPUT = 1,
};
struct Param
{
- int axis;
int num_splits;
};
diff --git a/runtime/onert/core/include/ir/operation/Transpose.h b/runtime/onert/core/include/ir/operation/Transpose.h
index 9631f7aaa..665c9bbce 100644
--- a/runtime/onert/core/include/ir/operation/Transpose.h
+++ b/runtime/onert/core/include/ir/operation/Transpose.h
@@ -34,26 +34,15 @@ public:
enum Input
{
INPUT = 0, // for an n-D tensor, specifying the tensor to be transposed.
- };
-
- struct Param
- {
- std::vector<int> perm;
+ PERMUTATION = 1,
};
public:
- Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
public:
void accept(OperationVisitor &v) const override;
OpCode opcode() const final { return OpCode::Transpose; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
};
} // namespace operation
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 5077fad69..30f211011 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -35,6 +35,7 @@ CONFIG(OP_SEQ_MAX_NODE , int , "0")
CONFIG(TRACE_FILEPATH , std::string , "")
CONFIG(FP16_ENABLE , bool , "0")
CONFIG(RUY_THREADS , int , "-1")
+CONFIG(USE_MMAPED_DATA , bool , "0")
// Auto-generate all operations
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index 1ebed48f2..701b835d2 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -47,7 +47,14 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param &param);
-ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer);
+ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf);
+
+ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf, int rank,
+ const ir::operation::BCQGather::Param &param);
+
+ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf);
ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param);
@@ -63,7 +70,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis);
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buf);
+ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf);
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
@@ -97,12 +104,12 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
const ir::Shape &input_false_shape);
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins,
- const int32_t *sizes);
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+ const int32_t *sizes_buf);
ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
- const ir::Shape &padding_shape, const int32_t *block_shape_data,
- const int32_t *padding_data);
+ const ir::Shape &padding_shape, const int32_t *block_shape_buf,
+ const int32_t *padding_buf);
ir::Shape inferSplitShape(const ir::Shape input_shape, int axis_value, int num_splits);
@@ -132,9 +139,11 @@ StridedSliceParams buildStridedSliceParams(const T *begin, const T *end, const T
ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSliceParams &op_params,
uint32_t rank);
-ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier);
+ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf,
+ const int32_t multiplier_size);
-ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm);
+ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf,
+ const int32_t rank);
ir::Shape inferUnpackShape(const ir::Shape &input_shape, int axis, int rank);
diff --git a/runtime/onert/core/include/util/Utils.h b/runtime/onert/core/include/util/Utils.h
index 847fb6971..8a4eea32b 100644
--- a/runtime/onert/core/include/util/Utils.h
+++ b/runtime/onert/core/include/util/Utils.h
@@ -22,6 +22,87 @@
#ifndef __ONERT_UTIL_UTILS_H__
#define __ONERT_UTIL_UTILS_H__
+#include "ir/Coordinates.h"
+#include "ir/Shape.h"
+
#define UNUSED_RELEASE(a) (void)(a)
+template <size_t from, size_t to, typename Enable = void> struct ForEachDimension
+{
+ template <typename L, typename... Args>
+ static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
+ L &&lambda_function, Args &&... args)
+ {
+ static_assert(from < to, "from must not be less than to");
+ assert(static_cast<int>(to) <= shape.rank());
+ const auto &d = shape.dim(from);
+
+ for (auto v = 0; v < d; v++)
+ {
+ coords.set(from, v);
+ ForEachDimension<from + 1, to>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ }
+ }
+};
+
+template <size_t from, size_t to>
+struct ForEachDimension<from, to, typename std::enable_if<from == to>::type>
+{
+ template <typename L, typename... Args>
+ static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
+ L &&lambda_function, Args &&... args)
+ {
+ UNUSED_RELEASE(shape);
+ assert(static_cast<int>(to) <= shape.rank());
+ lambda_function(coords, std::forward<Args>(args)...);
+ }
+};
+
+template <typename L, typename... Args>
+inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &&... args)
+{
+ assert(shape.rank() > 0);
+ for (auto i = 0; i < shape.rank(); ++i)
+ {
+ assert(shape.dim(i) > 0);
+ }
+
+ onert::ir::Coordinates coords;
+ switch (shape.rank())
+ {
+ case 0:
+ coords.set(0, 0);
+ ForEachDimension<0, 0>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 1:
+ ForEachDimension<0, 1>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 2:
+ ForEachDimension<0, 2>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 3:
+ ForEachDimension<0, 3>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 4:
+ ForEachDimension<0, 4>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 5:
+ ForEachDimension<0, 5>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ case 6:
+ ForEachDimension<0, 6>::unroll(shape, coords, std::forward<L>(lambda_function),
+ std::forward<Args>(args)...);
+ break;
+ default:
+ assert(false && "ShapeLoop, 1 <= Shape'rank <= 6");
+ break;
+ }
+}
#endif // __ONERT_UTIL_UTILS_H__
diff --git a/runtime/libs/ndarray/src/Array.cpp b/runtime/onert/core/src/backend/IPortableTensor.cc
index f9c9de9d3..cec34e780 100644
--- a/runtime/libs/ndarray/src/Array.cpp
+++ b/runtime/onert/core/src/backend/IPortableTensor.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,16 @@
* limitations under the License.
*/
-#include "ndarray/Array.h"
+#include "backend/IPortableTensor.h"
-namespace ndarray
+namespace onert
+{
+namespace backend
{
-template class Array<float>;
-template class Array<int32_t>;
-template class Array<uint32_t>;
-template class Array<uint8_t>;
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IPortableTensor::~IPortableTensor() {}
-} // namespace ndarray
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h
new file mode 100644
index 000000000..d179bfde4
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/BackendContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+ std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+ std::shared_ptr<IOptimizer> optimizer = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+ constant_initializer, kernel_gen, tensor_register,
+ optimizer),
+ _external_context(std::make_shared<ExternalContext>())
+ {
+ }
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
index 1288e4c96..77f02969d 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -17,8 +17,7 @@
#include "DynamicTensorManager.h"
#include "util/logging.h"
-#include "util/Exceptions.h"
-#include "ir/DataType.h"
+#include "misc/polymorphic_downcast.h"
namespace onert
{
@@ -33,82 +32,18 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry>
// DO NOTHING
}
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
- // NOTE Handle user tensors first
- auto user_tensor = _tensors->getNativeUserTensor(ind);
- if (user_tensor)
- {
- // User tensors cannot be reallocated.
- auto buffer_size = user_tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
- if (buffer_size < new_size)
- throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
- user_tensor->setShape(new_shape);
- return;
- }
-
- // NOTE Then handle own tensors
- auto tensor = _tensors->getNativeOwnTensor(ind);
- assert(tensor);
-
- bool previously_dynamic = tensor->is_dynamic();
-
- auto allocTensorMem = [&](bool overwrite = false) {
- auto capacity = tensor->total_size();
- auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
- if (overwrite)
- tensor->overwriteBuffer(alloc);
- else
- tensor->setBuffer(alloc);
- };
-
- if (!previously_dynamic)
- {
- // TODO deallocate tensor->buffer()
- // issue is that staticTensorManager might have allocate this memory
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else if (tensor->buffer() == nullptr)
- {
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem();
- }
- // when buffer was already allocated and new_shape requires different size
- else
- {
- auto previous_size = tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
- if (previous_size != new_size)
- {
- _dynamic_mem_mgr->deallocate(ind);
-
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else
- { // when buffer with same size was already allocated, shape could differ
- tensor->setShape(new_shape);
- }
- }
-}
-
void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeOwnTensor(ind, tensor);
+ auto tensor =
+ std::make_unique<cpu_common::Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+ _tensors->setNativeOwnTensor(ind, std::move(tensor));
}
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
+void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor)
{
- _dealloc_tensor_map[op_ind].emplace(operand_ind);
+ _dealloc_tensor_map[op_ind].emplace(tensor);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
@@ -118,25 +53,26 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
return;
auto &input_set = find->second;
- for (auto input_ind : input_set)
+ for (auto *tensor : input_set)
{
- if (!_tensors->getNativeTensor(input_ind)->is_dynamic())
+ if (!tensor->is_dynamic())
continue;
- _dynamic_mem_mgr->deallocate(input_ind);
- VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
+ _dynamic_mem_mgr->deallocate(tensor);
+
+ auto *cpu_tensor = nnfw::misc::polymorphic_downcast<cpu_common::Tensor *>(tensor);
+ cpu_tensor->resetBuffer();
+
+ VERBOSE(DynamicTensorManager) << "Deallocating a tensor " << (void *)tensor
<< " (input of op_ind: " << op_ind.value() << ")" << std::endl;
}
}
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
{
- if (!_tensors->getNativeTensor(output_ind)->is_dynamic())
- return;
-
- _dynamic_mem_mgr->deallocate(output_ind);
- VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
- << " (output of a subgraph)" << std::endl;
+ auto ptr = _tensors->getITensor(ind);
+ assert(ptr);
+ return ptr;
}
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
index dbe388ba2..fb822a917 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -43,14 +43,16 @@ public:
virtual ~DynamicTensorManager() = default;
- void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override;
-
void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
ir::Layout backend_layout);
- void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override;
+ void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) override;
void deallocInput(ir::OperationIndex op_ind) override;
- void deallocSubgraphOutput(ir::OperandIndex ind) override;
+
+ std::shared_ptr<cpu_common::DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
+
+private:
+ const ITensor *getRawITensor(ir::OperandIndex ind);
private:
/**
@@ -60,9 +62,10 @@ private:
std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
- // contains list of dynamic tensor index, which can be deallocated after running operation
- // note: this map could contain static tensor index too. Careful use is required.
- std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map;
+ // contains list of dynamic tensor, which can be deallocated after running operation
+ // note: this map could contain static tensor too. Careful use is required.
+ std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
+ _dealloc_tensor_map;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
new file mode 100644
index 000000000..58bccb6c6
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
+
+#include <backend/IExternalContext.h>
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+// TODO Unify this with cpu::ExternalContext
+class ExternalContext : public IExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(nullptr)
+ {
+ // setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index de5a6a5f6..d76ca53e3 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -58,12 +58,10 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_graph.operations();
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _return_fn_seq->enableDynamicShapeInferer(true);
for (const auto &op_idx : op_seq.operations())
{
@@ -78,7 +76,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
const auto then_subg_index = node.param().then_subg_index;
const auto else_subg_index = node.param().else_subg_index;
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
+ std::vector<backend::ITensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
auto input_tensor = getTensor(input_index);
@@ -86,14 +84,11 @@ void KernelGenerator::visit(const ir::operation::If &node)
input_tensors.emplace_back(input_tensor);
}
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- exec::DynAllocInfoMap outputs_dyn_alloc_info;
+ std::vector<backend::ITensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
auto output_tensor = getTensor(output_index);
-
output_tensors.emplace_back(output_tensor);
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
@@ -101,8 +96,8 @@ void KernelGenerator::visit(const ir::operation::If &node)
const auto cond_tensor = input_tensors.front();
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
- then_subg_index, else_subg_index, _executor_map);
+ cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index,
+ else_subg_index, _executor_map);
_return_fn = std::move(fn);
}
@@ -113,14 +108,10 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto input_index{node.getInputs().at(0)};
// Add PermuteLayer
- std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
- std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
- std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
-
- auto fn =
- std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
+ std::vector<ITensor *> output_tensors{getTensor(output_index)};
+ std::vector<ITensor *> input_tensors{getTensor(input_index)};
+ auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors);
_return_fn = std::move(fn);
}
@@ -131,7 +122,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
// This op does not support input as a constant, because controlflow backend does not have
// TensorBuilder
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
+ std::vector<backend::ITensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
auto input_tensor = getTensor(input_index);
@@ -139,29 +130,25 @@ void KernelGenerator::visit(const ir::operation::While &node)
input_tensors.emplace_back(input_tensor);
}
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
+ std::vector<backend::ITensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
auto output_tensor = getTensor(output_index);
-
output_tensors.emplace_back(output_tensor);
-
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
- input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
- cond_subg_index, body_subg_index, _executor_map);
+ input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index,
+ _executor_map);
_return_fn = std::move(fn);
}
-std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
+ backend::ITensor *ret = _tensor_registries.getITensor(index);
assert(ret != nullptr);
return ret;
}
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index b84a810e4..ce248913f 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -56,7 +56,7 @@ public:
void visit(const ir::operation::While &) override;
private:
- std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
+ backend::ITensor *getTensor(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index e5c3f5fd5..7d0ff201f 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -29,8 +29,8 @@ namespace controlflow
TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
- _static_tensor_mgr{
- new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+ _static_tensor_mgr{new cpu_common::StaticTensorManager(
+ _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())}
{
/* empty */
}
@@ -101,25 +101,14 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
+IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
{
- return _tensor_reg->getNativeOwnTensor(ind);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
-{
- return std::move(_static_tensor_mgr);
+ return _dynamic_tensor_mgr.get();
}
-std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
+cpu_common::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
{
- return std::move(_dynamic_tensor_mgr);
-}
-
-void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<UserTensor> &tensor)
-{
- _tensor_reg->setNativeUserTensor(ind, tensor);
+ return _tensor_reg->getNativeOwnTensor(ind);
}
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 2f2a2c47e..695994761 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -27,7 +27,6 @@
#include <unordered_map>
#include "DynamicTensorManager.h"
-#include "UserTensorRegistry.h"
namespace onert
{
@@ -59,20 +58,15 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
-
- std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
+ IDynamicTensorManager *dynamicTensorManager(void) override;
/**
* @brief Get tensor with a specific OperandIndex.
* @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
* If not, program will crash with assert or exception.
- * @return shared_ptr<operand::Tensor>
+ * @return operand::Tensor *
*/
- std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
- void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
+ cpu_common::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
index 678c5b73b..94f71bb9c 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -48,7 +48,7 @@ class TensorRegistry : public ITensorRegistry
public:
TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
- std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ ITensor *getITensor(const ir::OperandIndex &ind) override
{
auto base_tensor = _base_reg->getITensor(ind);
if (base_tensor)
@@ -56,7 +56,7 @@ public:
return getNativeUserTensor(ind);
}
- std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ ITensor *getNativeITensor(const ir::OperandIndex &ind) override
{
auto base_tensor = _base_reg->getNativeITensor(ind);
if (base_tensor)
@@ -64,7 +64,7 @@ public:
return getNativeUserTensor(ind);
}
- std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+ IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
{
auto base_tensor = _base_reg->getPortableTensor(ind);
if (base_tensor)
@@ -72,7 +72,7 @@ public:
return getNativeUserTensor(ind);
}
- std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+ IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
{
auto base_tensor = _base_reg->getNativeTensor(ind);
if (base_tensor)
@@ -80,21 +80,20 @@ public:
return getNativeUserTensor(ind);
}
- std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+ Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
{
return _base_reg->getNativeTensor(ind);
}
- std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+ UserTensor *getNativeUserTensor(const ir::OperandIndex &ind)
{
auto tensor = _native_user_tensors.find(ind);
if (tensor != _native_user_tensors.end())
- return tensor->second;
+ return tensor->second.get();
return nullptr;
}
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override
+ bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
{
assert(tensor);
assert(!getITensor(ind)); // For the ind, tensor is not registered yet
@@ -102,21 +101,21 @@ public:
return true;
}
- void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+ void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
{
assert(tensor);
assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _base_reg->setNativeTensor(ind, tensor);
+ _base_reg->setNativeTensor(ind, std::move(tensor));
}
- void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+ void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr<UserTensor> &&tensor)
{
assert(tensor);
assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _native_user_tensors[ind] = tensor;
+ _native_user_tensors[ind] = std::move(tensor);
}
- const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+ const ir::OperandIndexMap<std::unique_ptr<UserTensor>> &native_user_tensors()
{
return _native_user_tensors;
}
@@ -124,7 +123,7 @@ public:
private:
std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
- ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+ ir::OperandIndexMap<std::unique_ptr<UserTensor>> _native_user_tensors;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.cc b/runtime/onert/core/src/backend/controlflow/UserTensor.cc
index c8e2ebade..5081a90ea 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.cc
+++ b/runtime/onert/core/src/backend/controlflow/UserTensor.cc
@@ -16,6 +16,9 @@
#include "UserTensor.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
+
namespace onert
{
namespace backend
@@ -35,6 +38,16 @@ size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
return offset;
}
+bool UserTensor::applyShape(const ir::Shape &new_shape)
+{
+ // User tensors cannot be reallocated.
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (total_size() < new_size)
+ throw InsufficientBufferSizeException{"User given buffer size is too small."};
+ setShape(new_shape);
+ return true;
+}
+
} // namespace controlflow
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/controlflow/UserTensor.h
index 9be33595d..7aa62a8a9 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.h
+++ b/runtime/onert/core/src/backend/controlflow/UserTensor.h
@@ -38,16 +38,12 @@ namespace controlflow
class UserTensor : public IPortableTensor
{
public:
- UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size,
- IDynamicTensorManager *dynamic_tensor_manager)
- : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false},
- _dynamic_tensor_manager{dynamic_tensor_manager}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
+ : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
{
}
- UserTensor(const ir::OperandInfo &info, ir::Layout layout,
- IDynamicTensorManager *dynamic_tensor_manager)
- : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
{
}
@@ -73,15 +69,13 @@ public:
ir::Shape getShape() const override { return _info.shape(); }
void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
bool is_constant() const override { return false; }
- IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
+ bool applyShape(const ir::Shape &) override;
private:
- ir::OperandInfo _info;
ir::Layout _layout;
uint8_t *_buffer;
size_t _size;
bool _dynamic;
- IDynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
index 8377c7183..c0329acd8 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
@@ -30,16 +30,13 @@ namespace controlflow
namespace kernel
{
-IfLayer::IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
- const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
+IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
+ const std::vector<backend::ITensor *> output_tensors,
const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _output_indices{output_indices}, _graph{graph},
- _outputs_dyn_alloc_info{outputs_dyn_alloc_info}, _then_subg_index{then_subg_index},
+ _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index},
_else_subg_index{else_subg_index}, _executor_map{executor_map}
{
// At this point, executor_map may not have executors of then subg and else subg
@@ -63,21 +60,24 @@ void IfLayer::run()
};
exec::ExecutorBase *subg_exec = nullptr;
- if (getResultCond(_cond_tensor.get()))
+ bool cond_result = getResultCond(_cond_tensor);
+ if (cond_result)
{
+ VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
_executor_map->at(_then_subg_index).get());
}
else
{
+ VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
_executor_map->at(_else_subg_index).get());
}
const auto &subg_graph = subg_exec->graph();
- std::vector<std::shared_ptr<backend::ITensor>> src_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> dst_tensors;
+ std::vector<backend::ITensor *> src_tensors;
+ std::vector<backend::ITensor *> dst_tensors;
// Add tensors used in subgraph or contained in outputs of subgraph
assert(subg_graph.getInputs().size() == _input_tensors.size());
assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
@@ -91,9 +91,8 @@ void IfLayer::run()
dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
}
}
- const auto &subg_inputs_dyn_alloc_info = subg_exec->getInputsDynamicAllocInfo();
const auto permute_op_input_to_subg_input =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, subg_inputs_dyn_alloc_info);
+ std::make_shared<PermuteLayer>(src_tensors, dst_tensors);
// Add tensors used as output of operation or contained in outputs of operation
src_tensors.clear();
@@ -111,7 +110,7 @@ void IfLayer::run()
}
}
const auto permute_subg_output_to_op_output =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _outputs_dyn_alloc_info);
+ std::make_shared<PermuteLayer>(src_tensors, dst_tensors);
// Remove copying of unused tensor
permute_op_input_to_subg_input->prepare();
@@ -120,6 +119,8 @@ void IfLayer::run()
// Copy & run
subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
permute_subg_output_to_op_output->run();
+ VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
+ << std::endl;
}
} // namespace kernel
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
index ef3a6e6f6..1461388dc 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
@@ -32,11 +32,9 @@ namespace kernel
class IfLayer : public ::onert::exec::IFunction
{
public:
- IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
- const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
+ IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
+ const std::vector<backend::ITensor *> output_tensors,
const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map);
@@ -44,12 +42,11 @@ public:
void run() override;
private:
- const std::shared_ptr<backend::ITensor> _cond_tensor;
- const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
+ backend::ITensor *_cond_tensor;
+ const std::vector<backend::ITensor *> _input_tensors;
+ const std::vector<backend::ITensor *> _output_tensors;
const ir::OperandIndexSequence &_output_indices;
const ir::Graph &_graph;
- const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
exec::ExecutorMap *_executor_map;
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
index e8f1ea679..49fbb33c4 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
@@ -54,12 +54,9 @@ void PermuteLayer::run()
try
{
- const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
- auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
- if (!dyn_tensor_manager)
+ if (!dst_tensor->applyShape(new_shape))
throw std::runtime_error{
"Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
- dyn_tensor_manager->applyShape(dst_index, new_shape);
assert(dst_tensor->buffer() != nullptr);
}
catch (const std::out_of_range &e)
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
index 403ac770d..8129403a5 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
@@ -33,10 +33,7 @@ namespace kernel
class PermuteLayer : public onert::exec::IPermuteFunction
{
public:
- PermuteLayer(const std::vector<std::shared_ptr<ITensor>> &src_tensors,
- const std::vector<std::shared_ptr<ITensor>> &dst_tensors,
- const exec::DynAllocInfoMap &dst_dyn_alloc_info_map)
- : _dst_dyn_alloc_info_map{dst_dyn_alloc_info_map}
+ PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors)
{
assert(src_tensors.size() == dst_tensors.size());
_src_tensors = src_tensors;
@@ -64,9 +61,6 @@ public:
}
void run() override;
-
-private:
- const exec::DynAllocInfoMap _dst_dyn_alloc_info_map;
};
} // namespace kernel
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
index 50936e5f6..225f0dd7c 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
@@ -30,16 +30,14 @@ namespace controlflow
namespace kernel
{
-WhileLayer::WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
+WhileLayer::WhileLayer(const std::vector<backend::ITensor *> input_tensors,
+ const std::vector<backend::ITensor *> output_tensors,
const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
const ir::SubgraphIndex &cond_subg_index,
const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
_output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
- _output_tensors{output_tensors}, _outputs_dyn_alloc_info{outputs_dyn_alloc_info},
- _executor_map{executor_map}
+ _output_tensors{output_tensors}, _executor_map{executor_map}
{
// At this point, executor_map may not have executors of cond subg and body subg
}
@@ -62,15 +60,13 @@ void WhileLayer::run()
_executor_map->at(_body_subg_index).get());
const auto &cond_graph = cond_exec->graph();
- const auto &cond_inputs_dyn_alloc = cond_exec->getInputsDynamicAllocInfo();
const auto &body_graph = body_exec->graph();
- const auto &body_inputs_dyn_alloc = body_exec->getInputsDynamicAllocInfo();
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> cond_input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> body_input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> body_output_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
+ std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::ITensor *> cond_input_tensors;
+ std::vector<backend::ITensor *> body_input_tensors;
+ std::vector<backend::ITensor *> body_output_tensors;
+ std::vector<backend::ITensor *> output_tensors;
// Add only used tensors in cond subgraph
assert(cond_graph.getInputs().size() == _input_tensors.size());
@@ -85,7 +81,7 @@ void WhileLayer::run()
}
}
const auto permute_op_input_to_cond_input =
- std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
+ std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors);
// Add only used tensors among outputs of while operation
assert(_output_indices.size() == _input_tensors.size());
@@ -103,7 +99,7 @@ void WhileLayer::run()
}
}
const auto permute_op_input_to_op_output =
- std::make_shared<PermuteLayer>(input_tensors, output_tensors, _outputs_dyn_alloc_info);
+ std::make_shared<PermuteLayer>(input_tensors, output_tensors);
// Add all tensors with unused tensors in body subgraph because unused input tensors will be
// copied output tensors in body subgraph
@@ -111,7 +107,7 @@ void WhileLayer::run()
input_tensors = _input_tensors;
body_input_tensors = body_exec->getInputTensors();
const auto permute_op_input_to_body_input =
- std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, body_inputs_dyn_alloc);
+ std::make_shared<PermuteLayer>(input_tensors, body_input_tensors);
// Add only used tensors in cond subgraph
assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
@@ -127,8 +123,8 @@ void WhileLayer::run()
cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
}
}
- const auto permute_body_output_to_cond_input = std::make_shared<PermuteLayer>(
- body_output_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
+ const auto permute_body_output_to_cond_input =
+ std::make_shared<PermuteLayer>(body_output_tensors, cond_input_tensors);
// Add only used tensors in body subgraph
assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
@@ -146,8 +142,8 @@ void WhileLayer::run()
body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
}
}
- const auto permute_body_output_to_body_input = std::make_shared<PermuteLayer>(
- body_output_tensors, body_input_tensors, body_inputs_dyn_alloc);
+ const auto permute_body_output_to_body_input =
+ std::make_shared<PermuteLayer>(body_output_tensors, body_input_tensors);
// Add only used tensors among outputs of while operation
assert(_output_indices.size() == body_exec->getOutputTensors().size());
@@ -165,7 +161,7 @@ void WhileLayer::run()
}
}
const auto permute_body_output_to_op_output =
- std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _outputs_dyn_alloc_info);
+ std::make_shared<PermuteLayer>(body_output_tensors, output_tensors);
// Remove copying of unused tensor
permute_op_input_to_cond_input->prepare();
@@ -175,7 +171,9 @@ void WhileLayer::run()
permute_body_output_to_body_input->prepare();
permute_body_output_to_op_output->prepare();
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
assert(cond_exec->getOutputTensors().size() == 1);
auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
@@ -186,21 +184,27 @@ void WhileLayer::run()
};
const auto body_execute_with_op_inputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
body_exec->execute(_input_tensors, permute_op_input_to_body_input);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
const auto body_execute_with_body_outputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
std::function<void()> body_execute = body_execute_with_op_inputs;
const auto cond_execute = [&]() {
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
};
auto permute_to_outputs_fn = permute_op_input_to_op_output;
// Loop while Cond subgraph's output is true
- while (getResultCond(cond_output_tensor.get()))
+ while (getResultCond(cond_output_tensor))
{
body_execute();
cond_execute();
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
index ebca8acdc..9dae49281 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
@@ -35,10 +35,9 @@ namespace kernel
class WhileLayer : public ::onert::exec::IFunction
{
public:
- WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
+ WhileLayer(const std::vector<backend::ITensor *> input_tensors,
+ const std::vector<backend::ITensor *> output_tensors,
const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
exec::ExecutorMap *executor_map);
@@ -50,9 +49,8 @@ private:
const ir::SubgraphIndex _body_subg_index;
const ir::OperandIndexSequence &_output_indices;
const ir::Graph &_graph;
- const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
- const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
+ const std::vector<backend::ITensor *> _input_tensors;
+ const std::vector<backend::ITensor *> _output_tensors;
exec::ExecutorMap *_executor_map;
};
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
index f7ce3d011..740248ccd 100644
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -17,6 +17,7 @@
#include "backend/cpu_common/DynamicTensorManager.h"
#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
namespace onert
{
@@ -31,71 +32,18 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry>
// DO NOTHING
}
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
- VERBOSE_F() << ind << std::endl;
-
- auto tensor = _tensors->getNativeTensor(ind);
- assert(tensor);
-
- bool previously_dynamic = tensor->is_dynamic();
-
- auto allocTensorMem = [&](bool overwrite = false) {
- auto capacity = tensor->total_size();
- auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
- if (overwrite)
- tensor->overwriteBuffer(alloc);
- else
- tensor->setBuffer(alloc);
- };
-
- if (!previously_dynamic)
- {
- // TODO deallocate tensor->buffer()
- // issue is that staticTensorManager might have allocate this memory
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else if (tensor->buffer() == nullptr)
- {
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem();
- }
- // when buffer was already allocated and new_shape requires different size
- else
- {
- auto previous_size = tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
- if (previous_size != new_size)
- {
- _dynamic_mem_mgr->deallocate(ind);
-
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else
- { // when buffer with same size was already allocated, shape could differ
- tensor->setShape(new_shape);
- }
- }
-}
-
void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
assert(_tensors->getNativeTensor(ind) == nullptr);
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeTensor(ind, tensor);
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
}
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
+void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor)
{
- _dealloc_tensor_map[op_ind].emplace(operand_ind);
+ _dealloc_tensor_map[op_ind].emplace(tensor);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
@@ -105,31 +53,26 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
return;
auto &input_set = find->second;
- for (auto input_ind : input_set)
+ for (auto *tensor : input_set)
{
- auto *tensor = _tensors->getNativeTensor(input_ind).get();
if (!tensor->is_dynamic())
continue;
- _dynamic_mem_mgr->deallocate(input_ind);
- tensor->resetBuffer();
+ _dynamic_mem_mgr->deallocate(tensor);
- VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
+ auto *cpu_tensor = nnfw::misc::polymorphic_downcast<cpu_common::Tensor *>(tensor);
+ cpu_tensor->resetBuffer();
+
+ VERBOSE(DynamicTensorManager) << "Deallocating tensor " << (void *)cpu_tensor
<< " (input of op_ind: " << op_ind.value() << ")" << std::endl;
}
}
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
{
- auto *tensor = _tensors->getNativeTensor(output_ind).get();
- if (!tensor->is_dynamic())
- return;
-
- _dynamic_mem_mgr->deallocate(output_ind);
- tensor->resetBuffer();
-
- VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
- << " (output of a subgraph)" << std::endl;
+ auto ptr = _tensors->getITensor(ind);
+ assert(ptr);
+ return ptr;
}
} // namespace cpu_common
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc b/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
index 8cb9c22ca..9f179d9ee 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
@@ -20,6 +20,7 @@
#include "MemoryPlannerFactory.h"
#include "util/ConfigSource.h"
+#include "util/logging.h"
namespace onert
{
@@ -70,20 +71,20 @@ uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
return _mem_alloc->base() + mem_blk.offset;
}
-std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ir::OperandIndex &ind,
+std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
uint32_t capacity)
{
- auto find = _mem_alloc_map.find(ind);
+ auto find = _mem_alloc_map.find(tensor);
if (find != _mem_alloc_map.end())
throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
- _mem_alloc_map[ind] = std::make_shared<cpu_common::Allocator>(capacity);
- return _mem_alloc_map[ind];
+ _mem_alloc_map[tensor] = std::make_shared<cpu_common::Allocator>(capacity);
+ return _mem_alloc_map[tensor];
}
-void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind)
+void DynamicMemoryManager::deallocate(const ITensor *tensor)
{
- auto find = _mem_alloc_map.find(ind);
+ auto find = _mem_alloc_map.find(tensor);
if (find == _mem_alloc_map.end())
throw std::runtime_error("Cannot find Allocator for the requested index");
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index 440f70c93..cac43babe 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -27,9 +27,9 @@ namespace cpu_common
{
StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
- IDynamicTensorManager *dynamic_tensor_manager)
+ DynamicMemoryManager *dynamic_mem_mgr)
: _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
+ _dynamic_mem_mgr{dynamic_mem_mgr}
{
// DO NOTHING
}
@@ -39,10 +39,10 @@ void StaticTensorManager::allocateConsts(void)
for (auto &pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
- auto tensor = pair.second;
+ auto tensor = pair.second.get();
if (_as_constants[ind])
{
- auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
+ auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size());
tensor->setBuffer(mem_alloc);
auto buffer = mem_alloc->base();
VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
@@ -59,7 +59,7 @@ void StaticTensorManager::allocateNonconsts(void)
for (auto &pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
- auto tensor = pair.second;
+ auto tensor = pair.second.get();
if (!_as_constants[ind] && !tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
@@ -80,8 +80,8 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
- _tensors->setNativeTensor(ind, tensor);
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr);
+ _tensors->setNativeTensor(ind, std::move(tensor));
_as_constants[ind] = as_const;
}
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
index f34564dd9..d3dcf9a6d 100644
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
@@ -16,6 +16,9 @@
#include "backend/cpu_common/Tensor.h"
+#include "ir/DataType.h"
+#include "backend/cpu_common/MemoryManager.h"
+
namespace onert
{
namespace backend
@@ -23,6 +26,8 @@ namespace backend
namespace cpu_common
{
+Tensor::~Tensor() {}
+
size_t Tensor::calcOffset(const ir::Coordinates &coords) const
{
size_t rank = num_dimensions();
@@ -38,6 +43,55 @@ size_t Tensor::calcOffset(const ir::Coordinates &coords) const
void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+bool Tensor::applyShape(const ir::Shape &new_shape)
+{
+ bool previously_dynamic = is_dynamic();
+
+ auto allocTensorMem = [&](bool overwrite = false) {
+ auto capacity = total_size();
+ auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
+
+ if (overwrite)
+ overwriteBuffer(alloc);
+ else
+ setBuffer(alloc);
+ };
+
+ if (!previously_dynamic)
+ {
+ // TODO deallocate tensor->buffer()
+ // issue is that staticTensorManager might have allocate this memory
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem(true);
+ }
+ else if (buffer() == nullptr)
+ {
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem();
+ }
+ // when buffer was already allocated and new_shape requires different size
+ else
+ {
+ auto previous_size = total_size();
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (previous_size != new_size)
+ {
+ _dynamic_mem_mgr->deallocate(this);
+
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem(true);
+ }
+ else
+ { // when buffer with same size was already allocated, shape could differ
+ setShape(new_shape);
+ }
+ }
+ return true;
+}
+
} // namespace cpu_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index db7a14a96..0093f50fd 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -70,31 +70,18 @@ void BackendManager::loadBackend(const std::string &backend)
}
// TODO Remove indentation
- // Workaround If backend have dynamic library with "-boost" suffix naming,
- // BackendManager load library with "-boost" suffix instead of library without suffix
- // This feature is used for custom backend extension to support additional operations
{
- const std::string backend_boost_so = "libbackend_" + backend + "-boost" + SHARED_LIB_EXT;
const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+ void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
- void *handle = dlopen(backend_boost_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (handle == nullptr)
{
- handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
-
- if (handle == nullptr)
- {
- VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
- return;
- }
-
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
- }
- else
- {
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_boost_so << "\n";
+ VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
+ return;
}
+ VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
+
{
// load object creator function
auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 93dbbc3b5..12b582b35 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -19,6 +19,7 @@
#include "ParamChecker.h"
#include "ExecutorFactory.h"
#include "OperationValidator.h"
+#include "ShapeValidator.h"
#include "Fp32ToFp16Converter.h"
#include <backend/controlflow/Config.h>
@@ -27,8 +28,12 @@
#include "compiler/ManualScheduler.h"
#include "compiler/HEScheduler.h"
#include "compiler/StaticShapeInference.h"
+#include "compiler/pass/ConstantOutputPass.h"
+#include "compiler/pass/OddOutputPass.h"
+#include "compiler/pass/PassRunner.h"
#include "exec/ExecTime.h"
#include "ir/operation/LowerInfo.h"
+#include "ir/verifier/Verifier.h"
#include "dumper/dot/DotDumper.h"
#include "compiler/Linear.h"
#include "interp/InterpExecutor.h"
@@ -132,6 +137,8 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
backend::controlflow::Config::ID;
_options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
backend::controlflow::Config::ID;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] =
+ backend::controlflow::Config::ID;
}
// FIXME This is a workaround for bcq operations, should remove it
@@ -159,10 +166,24 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
VERBOSE(Compiler) << std::noboolalpha;
}
+ _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+ });
+
/***************************************************
* Prepare compilation phase
***************************************************/
+ // Check shape independent operation feature
+ // - Operand type
+ // - Shape independent parameter
+ _subgraphs->iterate(
+ [](const onert::ir::SubgraphIndex &, const ir::Graph &subg) { OperationValidator{subg}(); });
+
auto executors = std::make_shared<exec::ExecutorMap>();
// Compilable check
@@ -229,17 +250,23 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
inferer.dump();
}
- /*************************************************************
- * Backend independent analysis & optimization phase finished
- *************************************************************/
-
- // operation validation
+ // Shape validation
+ // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+ // TODO Move ShapeValidator into shape inference
+ // - Check input tensor shape validation
+ // - Check parameter value validation which valid value is depend on input tensor shape
+ // - Output tensor shape validation check is needless because
+ // static/dynamic shape inferer will make valid output shape
for (auto &pair : lowered_subgs)
{
auto &lowered_subg = pair.second;
- compiler::OperationValidator{lowered_subg->graph()}();
+ compiler::ShapeValidator{lowered_subg->graph()}();
}
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+
executors = std::make_shared<exec::ExecutorMap>();
for (auto &pair : lowered_subgs)
{
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 062c6c9c3..bb325ffbc 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -29,6 +29,7 @@
#include "backend/IConstantInitializer.h"
#include "backend/IKernelGenerator.h"
#include "backend/IOptimizer.h"
+#include "backend/IPortableTensor.h"
#include "backend/ITensorRegister.h"
#include "backend/controlflow/Config.h"
#include "backend/controlflow/KernelGenerator.h"
@@ -65,23 +66,6 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
-// TODO Think of a better way to manage TensorManagers
-backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
-{
- backend::TensorManagerSet tensor_mgrs;
- for (auto &tensor_builder : tensor_builders)
- {
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(s_tensor_manager));
-
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(d_tensor_manager));
- }
- return tensor_mgrs;
-}
-
} // namespace
} // namespace onert
@@ -172,7 +156,8 @@ void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_grap
for (const auto op_idx : op_seq)
{
const auto &op = lowered_graph->graph().operations().at(op_idx);
- for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+ for (const auto &index :
+ (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED))
{
if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
{
@@ -200,11 +185,11 @@ void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_grap
}
}
-std::vector<std::shared_ptr<backend::ITensor>>
+std::vector<backend::ITensor *>
ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices)
{
- std::vector<std::shared_ptr<backend::ITensor>> ret;
+ std::vector<backend::ITensor *> ret;
// TODO Store controlflow backend in BackendContext
std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
@@ -227,19 +212,20 @@ ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
for (auto ind : indices)
{
const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_shared<backend::controlflow::UserTensor>(
+ auto tensor = std::make_unique<backend::controlflow::UserTensor>(
operand.info(),
- ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
- cf_tensor_builder->dynamicTensorManager());
+ ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
+ );
// Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeUserTensor(ind, tensor);
- ret.push_back(tensor);
+ cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor));
+ auto *itensor = cf_tensor_reg->getITensor(ind);
+ ret.push_back(itensor);
}
return ret;
}
-void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
{
TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
@@ -251,13 +237,13 @@ void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_gra
ir::Remove::UNDEFINED)
{
// If an OpSequence input/output tensor does not have a own tensor object,
- // it must be using external tensors, so find the tensor from other tensor builders and
+ // it must be using migrant tensors, so find the tensor from other tensor builders and
// set the tensor to this tensor builder if portable
if (!backend_ctx->tensor_registry->getITensor(ind))
{
auto tensor = tensor_regs.getITensor(ind);
assert(tensor); // The tensor must have been registered
- auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
if (ptensor)
backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
@@ -299,8 +285,8 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
auto order = Linear::linearize(*lowered_graph);
runTensorRegistration(lowered_graph.get(), order);
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
+ std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::ITensor *> output_tensors;
if (options.is_primary_subgraph)
{
input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
@@ -318,7 +304,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph);
+ prepareMigrantTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -370,10 +356,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
});
}
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
- auto exec = new exec::LinearExecutor{
- std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map), order};
+ auto exec =
+ new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(code_map), order};
if (!options.trace_filepath.empty())
{
@@ -396,8 +381,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
auto order = Linear::linearize(*lowered_graph);
runTensorRegistration(lowered_graph.get(), order);
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
+ std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::ITensor *> output_tensors;
if (options.is_primary_subgraph)
{
input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
@@ -424,7 +409,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph);
+ prepareMigrantTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -477,20 +462,16 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
});
}
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
-
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
+ tensor_regs, std::move(code_map)};
}
else
{
- auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{
+ std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index b8893c03b..e76b721ea 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -46,10 +46,10 @@ private:
static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static std::vector<std::shared_ptr<backend::ITensor>>
+ static std::vector<backend::ITensor *>
initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices);
- static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
+ static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 5653b090e..fe54b0fdd 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -34,7 +34,8 @@ namespace compiler
static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
{
uint32_t size = 0;
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ for (const auto &ind :
+ (node.getInputs() | ir::Remove::UNDEFINED) + (node.getOutputs() | ir::Remove::UNDEFINED))
{
size += graph.operands().at(ind).info().total_size();
}
@@ -248,8 +249,9 @@ int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
if (time != _exec_time->NOT_FOUND)
return time;
+ // FIXME permute time is not recorded so the control reaches here always
// Makes the scheduler prefer keeping computations on one backend
- return size / 200;
+ return size / 400;
}
int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
@@ -370,7 +372,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
{
const auto &node = _graph->operations().at(index);
int64_t max_child_rank = 0;
- for (const auto &output : node.getOutputs())
+ for (const auto &output : node.getOutputs() | ir::Remove::UNDEFINED)
{
const auto &operand = _graph->operands().at(output);
const bool quant = operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 49a989500..39e58fe11 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -148,6 +148,9 @@ void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
tensor_builder->notifyFirstUse(ind);
}
+ const auto io_tensors =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
// At each operation,
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
@@ -182,7 +185,15 @@ void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
// plan for deallocation of dynamic tensor
auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
if (dyn_tensor_manager)
- dyn_tensor_manager->planDealloc(op_idx, ind);
+ {
+ const auto *backend =
+ lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend();
+ auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry;
+ auto *tensor = tensor_registry->getITensor(ind);
+ assert(tensor);
+ if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated
+ dyn_tensor_manager->planDealloc(op_idx, tensor);
+ }
}
}
}
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 1489a1884..cdf1a8158 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -21,6 +21,7 @@
#include "util/logging.h"
#include "compiler/pass/ConstantInsertionPass.h"
#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PassRunner.h"
#include "compiler/pass/PermutationOperationPass.h"
#include "compiler/pass/PermutationInsertionPass.h"
#include "compiler/pass/PermutationEliminationPass.h"
@@ -101,14 +102,14 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
});
- VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+ VERBOSE(OpSequences) << "dump before permutation insertion" << std::endl;
dumpOpSequences(_op_seqs, _graph.operations());
- pass::ConstantInsertionPass ci_pass(*this);
- ci_pass.run();
-
- pass::ConstantLoweringPass cl_pass(*this);
- cl_pass.run();
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantInsertionPass>(*this))
+ .append(std::make_unique<pass::ConstantLoweringPass>(*this))
+ .run();
// Set LowerInfo for each operand from the operand::LowerInfo holder
manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
@@ -116,20 +117,17 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
dumpLowerInfo();
}
- // Run Permutation Passes
- {
- pass::PermutationOperationPass po_pass(*this);
- po_pass.run();
-
- pass::PermutationInsertionPass pi_pass(*this);
- pi_pass.run();
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::PermutationOperationPass>(*this))
+ .append(std::make_unique<pass::PermutationInsertionPass>(*this))
+ .run();
- pass::PermutationEliminationPass pe_pass(*this);
- pe_pass.run();
+ // Optimization passes
+ pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
- VERBOSE(OpSequences) << "dump with permutation" << std::endl;
- dumpOpSequences(_op_seqs, _graph.operations());
- }
+ VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
// Graph verifications
{
@@ -276,7 +274,7 @@ void LoweredGraph::makeOpSequences(
auto &&lower_info = operands_lower_info.at(operand);
lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
- for (auto operand : node.getOutputs())
+ for (auto operand : node.getOutputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(operand);
lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
@@ -340,7 +338,7 @@ void LoweredGraph::manipulateLowerInfo(
assert(lower_info->def_factors().empty());
lower_info->addDefPermuteFactor(factor);
}
- for (auto index : _graph.getOutputs())
+ for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(index);
lower_info->addUsePermuteFactor(factor);
@@ -368,7 +366,7 @@ void LoweredGraph::manipulateLowerInfo(
}
}
}
- for (auto index : _graph.getOutputs())
+ for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(index);
if (lower_info->def_factors().size() == 0)
@@ -496,7 +494,7 @@ bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
branched_set.clear();
// Check for branching down
- for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
// TODO Fix this workaround for the case of model outputs that are used by another operation
// This is needed since the branching is decided by operation, but for model outputs,
@@ -544,7 +542,7 @@ bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
}
// node's input == op_seq's output?
- for (const auto output : n.getOutputs())
+ for (const auto output : n.getOutputs() | ir::Remove::UNDEFINED)
{
if (node_inputs.contains(output))
{
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
index f7f659e3e..0582cf154 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,13 +16,7 @@
#include "OperationValidator.h"
-#include <typeinfo>
-
#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
-#include "util/logging.h"
-#include "util/Utils.h"
#define OP_REQUIRES(EXP) \
do \
@@ -37,33 +31,14 @@ namespace compiler
{
OperationValidator::OperationValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ : _graph{graph}, _ctx{graph.operands()}
{
}
-void OperationValidator::checkUnaryOp(const ir::Operation &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- // Check if I/O types match
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- // Check if I/O shapes match
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
void OperationValidator::operator()()
{
- // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
- // creating Compiler
assert(_graph.subgraphs() == nullptr);
- _current_op_seq_layout = _graph.layout();
-
_graph.operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
@@ -72,50 +47,23 @@ void OperationValidator::visit(const ir::operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
- const auto out_index{node.getOutputs().at(0)};
// Constant lhs and rhs is not implemented yet
OP_REQUIRES(!_ctx.at(lhs_index).isConstant() && !_ctx.at(rhs_index).isConstant());
-
- if (_ctx.at(out_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
}
void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- // All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
-
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
-
+ // Non-constant block_size is not implemented yet
OP_REQUIRES(_ctx.at(block_size_index).isConstant());
-
- OP_REQUIRES(input_shape.C == output_shape.C);
}
void OperationValidator::visit(const ir::operation::Comparison &node)
{
const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
@@ -124,223 +72,20 @@ void OperationValidator::visit(const ir::operation::Comparison &node)
OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8);
}
-void OperationValidator::visit(const ir::operation::Softmax &node)
-{
- VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
- OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::Pool2D &node)
+void OperationValidator::visit(const ir::operation::DepthToSpace &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+ int32_t block_size = node.param().block_size;
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(block_size > 0);
}
-void OperationValidator::visit(const ir::operation::Permute &node)
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
{
- VERBOSE(Permute) << "Configure Permute operation" << std::endl;
-
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Reduce &node)
-{
- VERBOSE(Permute) << "Configure " + node.name() + " operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto input_shape = _ctx.at(input_index).shape();
- const auto output_shape = _ctx.at(output_index).shape();
-
- OP_REQUIRES(input_shape.rank() <= 4);
- OP_REQUIRES(output_shape.rank() <= input_shape.rank());
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
- {
- if (output_shape.rank() == 2)
- {
- // Reducing HW
- OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(3) == output_shape.dim(1));
- }
- else if (output_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
- OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(1) == output_shape.dim(1) &&
- input_shape.dim(2) == output_shape.dim(2)) ||
- (input_shape.dim(0) == output_shape.dim(0) &&
- (input_shape.dim(1) == output_shape.dim(1) ||
- input_shape.dim(2) == output_shape.dim(1)) &&
- input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::Transpose &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto &input_shape = _ctx.at(input_index).shape();
-
- OP_REQUIRES(input_shape.rank() == static_cast<int>(perm.size()));
- OP_REQUIRES(input_shape.rank() == output_shape.rank());
-}
-
-void OperationValidator::visit(const ir::operation::RNN &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto batch_size = _ctx.at(output_index).shape().dim(0);
- const auto num_units = _ctx.at(output_index).shape().dim(1);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_in_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
- OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
- OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
- OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
- num_units == _ctx.at(bias_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- // All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
-
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
-
- OP_REQUIRES(_ctx.at(block_size_index).isConstant());
- OP_REQUIRES(_ctx.at(paddings_index).isConstant());
-
- OP_REQUIRES(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto block_size = node.param().block_size;
-
- // All assertions as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES((block_size >= 1) && (input_shape.H % block_size == 0) &&
- (input_shape.W % block_size == 0));
- OP_REQUIRES(input_shape.N == output_shape.N);
- OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
-{
- checkUnaryOp(node);
+ // Check if I/O types match
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
}
void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
@@ -358,9 +103,6 @@ void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- OP_REQUIRES(node.getInputs().size() == 1);
- OP_REQUIRES(node.getOutputs().size() == 1);
-
// Check if I/O types match
if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
{
@@ -376,47 +118,13 @@ void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
{
OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
}
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
}
void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
{
- const auto output_index{node.getOutputs().at(0)};
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &values_obj = _ctx.at(values_index);
-
- // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
- // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
- {
- OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- const auto &output_shape = output_obj.shape();
- const auto &lookups_shape = lookups_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- OP_REQUIRES(lookups_shape.rank() == 1);
- OP_REQUIRES(values_shape.rank() >= 2);
-
- // output should be a n-D tensor with the same rank and shape as the values tensor, except for
- // the first dimension which has the same size as lookups' only dimension.
- OP_REQUIRES(output_shape.rank() == values_shape.rank());
- OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
- for (int n = 1; n < output_shape.rank(); ++n)
- {
- OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
- }
- }
+ OP_REQUIRES(_ctx.at(lookups_index).typeInfo().type() == ir::DataType::INT32);
}
void OperationValidator::visit(const ir::operation::ExpandDims &node)
@@ -427,488 +135,35 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
-
- if (_ctx.at(axis_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
}
void OperationValidator::visit(const ir::operation::HashtableLookup &node)
{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &hits_obj = _ctx.at(hits_index);
-
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &keys_obj = _ctx.at(keys_index);
- const auto &values_obj = _ctx.at(values_index);
-
- OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
- OP_REQUIRES(keys_obj.typeInfo().type() == ir::DataType::INT32);
- OP_REQUIRES(hits_obj.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- const auto &output_shape = output_obj.shape();
- const auto &lookups_shape = lookups_obj.shape();
- const auto &keys_shape = keys_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- OP_REQUIRES(values_shape.rank() == output_shape.rank());
- OP_REQUIRES(lookups_shape.rank() == 1);
- OP_REQUIRES(keys_shape.rank() == 1);
- OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
- OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
-}
-
-void OperationValidator::visit(const ir::operation::TransposeConv &node)
-{
- // param check
- OP_REQUIRES((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
-
- // shape check
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
-
- // Only 4D tensors are supported
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- // The kernel has only IHWO layout on frontend
- // So ker_shape is treated here below
- // I -> N
- // H -> H
- // W -> W
- // O -> C
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
-
- OP_REQUIRES(ifm_shape.N == ofm_shape.N);
- OP_REQUIRES(ifm_shape.C == ker_shape.C);
- OP_REQUIRES(ker_shape.N == ofm_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto indices_shape = _ctx.at(indices_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
-
- OP_REQUIRES(ifm_shape.rank() <= 4);
- OP_REQUIRES(indices_shape.rank() <= 3);
- OP_REQUIRES(ofm_shape.rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::DepthToSpace &node)
-{
- // param check
- int32_t block_size = node.param().block_size;
-
- OP_REQUIRES(block_size > 0);
-
- // shape check
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
- const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
- {
- OP_REQUIRES(output_shape.N == input_shape.N);
- OP_REQUIRES(output_shape.H == input_shape.H * block_size);
- OP_REQUIRES(output_shape.W == input_shape.W * block_size);
- OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
- OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
- }
+ OP_REQUIRES(_ctx.at(lookups_index).typeInfo().type() == ir::DataType::INT32);
+ OP_REQUIRES(_ctx.at(keys_index).typeInfo().type() == ir::DataType::INT32);
+ OP_REQUIRES(_ctx.at(hits_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
}
void OperationValidator::visit(const ir::operation::Pack &node)
{
- // param check
const auto num{node.param().num};
- const auto axis{node.param().axis};
- OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- // shape check
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto output_rank = static_cast<int32_t>(output_shape.rank());
- const auto input1_index{node.getInputs().at(0)};
- const auto input_shape = _ctx.at(input1_index).shape();
-
- OP_REQUIRES(axis >= -output_rank && axis < output_rank);
- for (const auto &index : node.getInputs())
- {
- OP_REQUIRES(input_shape == _ctx.at(index).shape());
- }
-}
-
-void OperationValidator::visit(const ir::operation::LSTM &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2 &&
- _ctx.at(output_state_out_index).shape().rank() == 2 &&
- _ctx.at(cell_state_out_index).shape().rank() == 2 &&
- _ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(projection_weights_index).shape().rank() == 2 &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
-
- OP_REQUIRES(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
- _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(projection_bias_index).shape().rank() == 1);
-
- // CIFG assertion
- OP_REQUIRES((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
-
- // Peephole assertion
- OP_REQUIRES((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
- bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
- // true: no CIFG
- // false: CIFG
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE The projection weights may have data but the projection bias may not.
- bool has_projection_param = has_projection_weights;
-
- const auto batch_size = _ctx.at(input_index).shape().dim(0);
- OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) &&
- batch_size == _ctx.at(output_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(output_index).shape().dim(0));
-
- const auto input_size = _ctx.at(input_index).shape().dim(1);
- OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
- const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1);
- OP_REQUIRES(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
- num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_state_in_index).shape().dim(1) &&
- (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) ||
- ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1))));
-
- const auto output_size = _ctx.at(output_index).shape().dim(1);
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_in_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_out_index).shape().dim(1));
-
- if (has_cifg_param)
- {
- OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
- num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
- has_input_gate_bias);
- if (has_cell_to_input_weights)
- {
- // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
- OP_REQUIRES(has_peephole_param);
- }
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
- }
- else
- {
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
- }
-
- if (has_peephole_param)
- {
- OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
- }
-
- if (has_projection_param)
- {
- OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
- OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
- if (has_projection_bias)
- {
- OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::L2Normalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
-
- OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
-
- for (auto i = 0; i < ifm_shape.rank(); i++)
- {
- OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
- }
-}
-
-void OperationValidator::visit(const ir::operation::Unpack &node)
-{
- const auto num{node.param().num};
- OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
- const auto axis{node.param().axis};
-
- const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
-
- const auto &input_shape = _ctx.at(input_index).shape();
- const auto input_rank = static_cast<int32_t>(input_shape.rank());
-
- OP_REQUIRES(axis >= -input_rank && axis < input_rank);
+ OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
}
void OperationValidator::visit(const ir::operation::Pad &node)
{
const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
-
- const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
-
- const auto &pad_shape = _ctx.at(pad_index).shape();
- const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
-
- OP_REQUIRES(pad_shape.rank() == 2);
- OP_REQUIRES(pad_shape.dim(0) == input_rank);
- OP_REQUIRES(pad_shape.dim(1) == 2);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Select &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
- const auto input_true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
- const auto input_false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_true_index);
- UNUSED_RELEASE(input_false_index);
-
- OP_REQUIRES(_ctx.at(condition_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::StridedSlice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
- UNUSED_RELEASE(starts_index);
- UNUSED_RELEASE(ends_index);
- UNUSED_RELEASE(strides_index);
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
-}
-void OperationValidator::visit(const ir::operation::Split &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
- if (_ctx.at(input_index).info().isDynamic())
- return;
-
- const auto num_splits = node.param().num_splits;
- const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto axis = node.param().axis < 0 ? node.param().axis + input_rank : node.param().axis;
-
- OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
- OP_REQUIRES(axis >= 0 && axis < input_rank);
- OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
-
- OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
-}
-
-void OperationValidator::visit(const ir::operation::Shape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- UNUSED_RELEASE(input_index);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
}
void OperationValidator::visit(const ir::operation::ResizeBilinear &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
- if (_ctx.at(output_index).info().isDynamic())
- {
- return;
- }
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
@@ -923,23 +178,31 @@ void OperationValidator::visit(const ir::operation::Reverse &node)
OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ // Non-constant block_size and padding is not implemented yet
+ OP_REQUIRES(_ctx.at(block_size_index).isConstant());
+ OP_REQUIRES(_ctx.at(paddings_index).isConstant());
}
-void OperationValidator::visit(const ir::operation::If &)
+void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
{
- // TODO Add to validate with subgraphs
+ const auto block_size = node.param().block_size;
+ OP_REQUIRES(block_size >= 1);
}
-void OperationValidator::visit(const ir::operation::While &node)
+void OperationValidator::visit(const ir::operation::Split &node)
{
- // This validator does not check shape. So checking isDynamic() is skipped.
+ const auto num_splits = node.param().num_splits;
- OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
- // TODO Add to validate with subgraphs
+ OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
+ OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
}
void OperationValidator::visit(const ir::operation::SquaredDifference &node)
@@ -948,105 +211,33 @@ void OperationValidator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- // Check for Type equivalence
OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(lhs_index).typeInfo().type());
OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- auto output_shape = _ctx.at(output_index).shape();
- auto lhs_shape = _ctx.at(lhs_index).shape();
- auto rhs_shape = _ctx.at(rhs_index).shape();
- // Check for output rank
- OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
- auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
-
- for (int idx = 1; idx <= min_rank; idx++)
- {
- int l_idx = lhs_shape.rank() - idx;
- int r_idx = rhs_shape.rank() - idx;
- int out_idx = output_shape.rank() - idx;
-
- OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
-
- auto l_dims = lhs_shape.dim(l_idx);
- auto r_dims = rhs_shape.dim(r_idx);
- auto out_dims = output_shape.dim(out_idx);
-
- OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
- ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
- }
- auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
- for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
- {
- int out_idx = output_shape.rank() - idx;
- int tmp_idx = tmp_shape.rank() - idx;
-
- OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
- (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
- }
}
-void OperationValidator::visit(const ir::operation::Tile &node)
+
+void OperationValidator::visit(const ir::operation::StridedSlice &node)
{
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- const auto multiple_index{node.getInputs().at(1)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
}
-void OperationValidator::visit(const ir::operation::Range &node)
+void OperationValidator::visit(const ir::operation::TransposeConv &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
+ OP_REQUIRES((node.param().padding.type == ir::PaddingType::SAME) ||
+ (node.param().padding.type == ir::PaddingType::VALID));
}
-void OperationValidator::visit(const ir::operation::MatrixBandPart &node)
+void OperationValidator::visit(const ir::operation::Unpack &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
- const auto num_lower_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
- const auto num_upper_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
- OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
- OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+ const auto num{node.param().num};
+ OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
}
-void OperationValidator::visit(const ir::operation::LogSoftmax &node)
+void OperationValidator::visit(const ir::operation::While &node)
{
- VERBOSE(LogSoftmax) << "Configure LOGSOFTMAX operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
index deb6357bb..f884a3765 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
#ifndef __ONERT_COMPILER_OPERATION_VALIDATOR_H__
#define __ONERT_COMPILER_OPERATION_VALIDATOR_H__
-#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
namespace onert
@@ -47,51 +46,30 @@ public:
void visit(const ir::operation::BatchMatMul &node) override;
void visit(const ir::operation::BatchToSpaceND &node) override;
void visit(const ir::operation::Comparison &node) override;
- void visit(const ir::operation::Softmax &node) override;
- void visit(const ir::operation::InstanceNorm &node) override;
- void visit(const ir::operation::Permute &node) override;
- void visit(const ir::operation::Pool2D &node) override;
- void visit(const ir::operation::Reduce &node) override;
- void visit(const ir::operation::Transpose &node) override;
- void visit(const ir::operation::RNN &node) override;
- void visit(const ir::operation::SpaceToBatchND &node) override;
- void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::ElementwiseActivation &node) override;
void visit(const ir::operation::ElementwiseBinary &node) override;
void visit(const ir::operation::ElementwiseUnary &node) override;
void visit(const ir::operation::EmbeddingLookup &node) override;
void visit(const ir::operation::ExpandDims &node) override;
void visit(const ir::operation::HashtableLookup &node) override;
- void visit(const ir::operation::TransposeConv &node) override;
- void visit(const ir::operation::Gather &node) override;
- void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::Pack &node) override;
- void visit(const ir::operation::LSTM &node) override;
- void visit(const ir::operation::L2Normalization &node) override;
- void visit(const ir::operation::Unpack &node) override;
void visit(const ir::operation::Pad &node) override;
- void visit(const ir::operation::Select &node) override;
- void visit(const ir::operation::StridedSlice &node) override;
- void visit(const ir::operation::Split &node) override;
- void visit(const ir::operation::Shape &node) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &node) override;
- void visit(const ir::operation::If &node) override;
- void visit(const ir::operation::While &node) override;
+ void visit(const ir::operation::SpaceToBatchND &node) override;
+ void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::Split &node) override;
void visit(const ir::operation::SquaredDifference &node) override;
- void visit(const ir::operation::Tile &node) override;
- void visit(const ir::operation::Range &node) override;
- void visit(const ir::operation::MatrixBandPart &node) override;
- void visit(const ir::operation::LogSoftmax &node) override;
-
-private:
- void checkUnaryOp(const ir::Operation &node);
+ void visit(const ir::operation::StridedSlice &node) override;
+ void visit(const ir::operation::TransposeConv &node) override;
+ void visit(const ir::operation::Unpack &node) override;
+ void visit(const ir::operation::While &node) override;
private:
// TODO Remove _ctx field
const ir::Graph &_graph;
const ir::Operands &_ctx;
- ir::Layout _current_op_seq_layout;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
new file mode 100644
index 000000000..8be4fe6ec
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -0,0 +1,1021 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeValidator.h"
+
+#include <typeinfo>
+
+#include "ir/Graph.h"
+#include "ir/operation/LowerInfo.h"
+
+#include "util/logging.h"
+#include "util/Utils.h"
+
+#define OP_REQUIRES(EXP) \
+ do \
+ { \
+ if (!(EXP)) \
+ throw std::runtime_error("ShapeValidator failed at line " + std::to_string(__LINE__)); \
+ } while (0)
+
+namespace onert
+{
+namespace compiler
+{
+
+ShapeValidator::ShapeValidator(const ir::Graph &graph)
+ : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+{
+}
+
+void ShapeValidator::checkUnaryOp(const ir::Operation &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ // Check if I/O shapes match
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void ShapeValidator::operator()()
+{
+ // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
+ // creating Compiler
+ assert(_graph.subgraphs() == nullptr);
+
+ _current_op_seq_layout = _graph.layout();
+
+ _graph.operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
+}
+
+void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
+{
+ const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
+ const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
+ const auto out_index{node.getOutputs().at(0)};
+
+ if (_ctx.at(out_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
+ OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
+ OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
+ OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
+}
+
+void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+
+ // All requirement as per NNAPI specification.
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
+
+ OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+
+ OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto weight_scales_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
+ const auto weight_binary_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
+ const auto weight_cluster_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2);
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1));
+
+ OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0);
+ OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2);
+
+ // more shape validation will be done inside kernel.
+
+ // TODO Check bias dimension (can be null tensor)
+}
+
+void ShapeValidator::visit(const ir::operation::BCQGather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+ const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)};
+ const auto input_clusters_index{
+ node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+
+ OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
+ OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2);
+
+ OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0);
+ OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2);
+
+ // more shape validation will be done inside kernel.
+}
+
+void ShapeValidator::visit(const ir::operation::Comparison &)
+{
+ // TODO Shape validation of comparison
+}
+
+void ShapeValidator::visit(const ir::operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+ const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+ const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
+ OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Reduce &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+ const auto input_shape = _ctx.at(input_index).shape();
+ const auto output_shape = _ctx.at(output_index).shape();
+
+ OP_REQUIRES(input_shape.rank() <= 4);
+ OP_REQUIRES(output_shape.rank() <= input_shape.rank());
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+ {
+ if (output_shape.rank() == 2)
+ {
+ // Reducing HW
+ OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(3) == output_shape.dim(1));
+ }
+ else if (output_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
+ OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) ||
+ input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ }
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Transpose &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+
+ OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 ||
+ input_shape.rank() == static_cast<int>(_ctx.at(perm_index).shape().num_elements()));
+ OP_REQUIRES(input_shape.rank() == output_shape.rank());
+}
+
+void ShapeValidator::visit(const ir::operation::RNN &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto hidden_state_out_index{
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+ const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+ const auto recurrent_weights_index{
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+ const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+ const auto batch_size = _ctx.at(output_index).shape().dim(0);
+ const auto num_units = _ctx.at(output_index).shape().dim(1);
+
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
+ _ctx.at(input_index).shape().rank() == 2 &&
+ _ctx.at(weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_in_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
+
+ OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
+ OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
+
+ OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(bias_index).shape().dim(0));
+ OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+
+ // All requirement as per NNAPI specification.
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
+
+ OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
+ OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
+
+ OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto block_size = node.param().block_size;
+
+ // All assertions as per NNAPI specification.
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
+ OP_REQUIRES(input_shape.N == output_shape.N);
+ OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseActivation &node) { checkUnaryOp(node); }
+
+void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
+{
+ // TODO Shape validation of ElementwiseBinary
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+ const auto &output_obj = _ctx.at(output_index);
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &values_obj = _ctx.at(values_index);
+
+ // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
+ // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
+ {
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ OP_REQUIRES(lookups_shape.rank() == 1);
+ OP_REQUIRES(values_shape.rank() >= 2);
+
+ // output should be a n-D tensor with the same rank and shape as the values tensor, except for
+ // the first dimension which has the same size as lookups' only dimension.
+ OP_REQUIRES(output_shape.rank() == values_shape.rank());
+ OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
+ for (int n = 1; n < output_shape.rank(); ++n)
+ {
+ OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
+ }
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::ExpandDims &node)
+{
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+
+ if (_ctx.at(axis_index).info().isDynamic())
+ return;
+ OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
+}
+
+void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+ const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+ const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+ const auto &output_obj = _ctx.at(output_index);
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &keys_obj = _ctx.at(keys_index);
+ const auto &values_obj = _ctx.at(values_index);
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &keys_shape = keys_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ OP_REQUIRES(values_shape.rank() == output_shape.rank());
+ OP_REQUIRES(lookups_shape.rank() == 1);
+ OP_REQUIRES(keys_shape.rank() == 1);
+ OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
+ OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
+}
+
+void ShapeValidator::visit(const ir::operation::TransposeConv &node)
+{
+ // shape check
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+
+ // Only 4D tensors are supported
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
+ OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ // The kernel has only IHWO layout on frontend
+ // So ker_shape is treated here below
+ // I -> N
+ // H -> H
+ // W -> W
+ // O -> C
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+
+ OP_REQUIRES(ifm_shape.N == ofm_shape.N);
+ OP_REQUIRES(ifm_shape.C == ker_shape.C);
+ OP_REQUIRES(ker_shape.N == ofm_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::Gather &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto indices_shape = _ctx.at(indices_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ OP_REQUIRES(ifm_shape.rank() <= 4);
+ OP_REQUIRES(indices_shape.rank() <= 3);
+ OP_REQUIRES(ofm_shape.rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
+{
+ int32_t block_size = node.param().block_size;
+
+ // shape check
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
+ const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
+
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+
+ {
+ OP_REQUIRES(output_shape.N == input_shape.N);
+ OP_REQUIRES(output_shape.H == input_shape.H * block_size);
+ OP_REQUIRES(output_shape.W == input_shape.W * block_size);
+ OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
+ OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Pack &node)
+{
+ const auto axis{node.param().axis};
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ // shape check
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto output_rank = static_cast<int32_t>(output_shape.rank());
+
+ const auto input1_index{node.getInputs().at(0)};
+ const auto input_shape = _ctx.at(input1_index).shape();
+
+ OP_REQUIRES(axis >= -output_rank && axis < output_rank);
+ for (const auto &index : node.getInputs())
+ {
+ OP_REQUIRES(input_shape == _ctx.at(index).shape());
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::LSTM &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i)
+ {
+ OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
+ }
+ OP_REQUIRES(
+ (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
+ (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
+ _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ _ctx.at(projection_weights_index).shape().rank() == 2 &&
+ _ctx.at(output_state_in_index).shape().rank() == 2 &&
+ _ctx.at(cell_state_in_index).shape().rank() == 2);
+
+ OP_REQUIRES(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
+ _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
+ _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
+ _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(cell_bias_index).shape().rank() == 1 &&
+ _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(projection_bias_index).shape().rank() == 1);
+
+ // CIFG assertion
+ OP_REQUIRES((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
+ _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
+ (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
+ _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
+
+ // Peephole assertion
+ OP_REQUIRES((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
+ (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
+
+ bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
+ bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE The projection weights may have data but the projection bias may not.
+ bool has_projection_param = has_projection_weights;
+
+ const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
+ ? _ctx.at(input_index).shape().dim(1)
+ : _ctx.at(input_index).shape().dim(0);
+ OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
+
+ const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1);
+ OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
+ input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
+ input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
+
+ const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0);
+ OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_state_in_index).shape().dim(1));
+
+ const auto output_size =
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
+ OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
+ output_size == _ctx.at(output_state_in_index).shape().dim(1));
+
+ if (has_cifg_param)
+ {
+ OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
+ (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
+ num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
+ has_input_gate_bias);
+ if (has_cell_to_input_weights)
+ {
+ // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
+ OP_REQUIRES(has_peephole_param);
+ }
+ if (_ctx.exist(scratch_buffer_index))
+ OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+ }
+ else
+ {
+ if (_ctx.exist(scratch_buffer_index))
+ OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+ }
+
+ if (has_peephole_param)
+ {
+ OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
+ (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+ }
+
+ if (has_projection_param)
+ {
+ OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
+ OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
+ if (has_projection_bias)
+ {
+ OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
+ }
+ }
+
+ if (_ctx.exist(scratch_buffer_index))
+ {
+ OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0));
+ }
+
+ if (_ctx.exist(output_state_out_index))
+ {
+ OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0));
+ OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1));
+ }
+
+ if (_ctx.exist(cell_state_out_index))
+ {
+ OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0));
+ OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::L2Normalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
+
+ for (auto i = 0; i < ifm_shape.rank(); i++)
+ {
+ OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Unpack &node)
+{
+ const auto axis{node.param().axis};
+ const auto output_index{node.getInputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+
+ const auto &input_shape = _ctx.at(input_index).shape();
+ const auto input_rank = static_cast<int32_t>(input_shape.rank());
+
+ OP_REQUIRES(axis >= -input_rank && axis < input_rank);
+}
+
+void ShapeValidator::visit(const ir::operation::Pad &node)
+{
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+
+ const auto output_index{node.getInputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+
+ const auto &pad_shape = _ctx.at(pad_index).shape();
+ const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
+
+ OP_REQUIRES(pad_shape.rank() == 2);
+ OP_REQUIRES(pad_shape.dim(0) == input_rank);
+ OP_REQUIRES(pad_shape.dim(1) == 2);
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Select &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ // This validator does not check shape. So checking isDynamic() is skipped.
+
+ const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
+ const auto input_true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
+ const auto input_false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_true_index);
+ UNUSED_RELEASE(input_false_index);
+
+ OP_REQUIRES(_ctx.at(condition_index).typeInfo().type() == ir::DataType::BOOL8);
+}
+
+void ShapeValidator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Split &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
+
+ const auto num_splits = node.param().num_splits;
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+ auto axis = *reinterpret_cast<const int32_t *>(_ctx.at(axis_index).data()->base());
+ axis = axis < 0 ? axis + input_rank : axis;
+
+ OP_REQUIRES(axis >= 0 && axis < input_rank);
+ OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::Shape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+ UNUSED_RELEASE(input_index);
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+ if (_ctx.at(output_index).info().isDynamic())
+ {
+ return;
+ }
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Reverse &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::If &)
+{
+ // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::While &)
+{
+ // This validator does not check shape. So checking isDynamic() is skipped.
+ // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ // Check for dimension constraints
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ auto output_shape = _ctx.at(output_index).shape();
+ auto lhs_shape = _ctx.at(lhs_index).shape();
+ auto rhs_shape = _ctx.at(rhs_index).shape();
+ // Check for output rank
+ OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
+ auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
+
+ for (int idx = 1; idx <= min_rank; idx++)
+ {
+ int l_idx = lhs_shape.rank() - idx;
+ int r_idx = rhs_shape.rank() - idx;
+ int out_idx = output_shape.rank() - idx;
+
+ OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
+
+ auto l_dims = lhs_shape.dim(l_idx);
+ auto r_dims = rhs_shape.dim(r_idx);
+ auto out_dims = output_shape.dim(out_idx);
+
+ OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
+ ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
+ }
+ auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
+ for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
+ {
+ int out_idx = output_shape.rank() - idx;
+ int tmp_idx = tmp_shape.rank() - idx;
+
+ OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
+ (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
+ }
+}
+void ShapeValidator::visit(const ir::operation::Tile &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+ const auto multiple_index{node.getInputs().at(1)};
+
+ OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
+ OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Range &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
+
+ // Check for dimension constraints
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
+ OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
+ OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
+ const auto num_lower_index{
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
+ const auto num_upper_index{
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
+
+ // Check for dimension constraints
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+ OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+ OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+}
+
+void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
new file mode 100644
index 000000000..f40c098d5
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_SHAPE_VALIDATOR_H__
+#define __ONERT_COMPILER_SHAPE_VALIDATOR_H__
+
+#include "ir/Layout.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+class Operands;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class ShapeValidator : public ir::OperationVisitor
+{
+public:
+ ShapeValidator(void) = delete;
+ ShapeValidator(const ir::Graph &graph);
+
+public:
+ void operator()();
+
+public:
+ void visit(const ir::operation::BatchMatMul &node) override;
+ void visit(const ir::operation::BatchToSpaceND &node) override;
+ void visit(const ir::operation::BCQFullyConnected &node) override;
+ void visit(const ir::operation::BCQGather &node) override;
+ void visit(const ir::operation::Comparison &node) override;
+ void visit(const ir::operation::Softmax &node) override;
+ void visit(const ir::operation::InstanceNorm &node) override;
+ void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::Pool2D &node) override;
+ void visit(const ir::operation::Reduce &node) override;
+ void visit(const ir::operation::Transpose &node) override;
+ void visit(const ir::operation::RNN &node) override;
+ void visit(const ir::operation::SpaceToBatchND &node) override;
+ void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::ElementwiseActivation &node) override;
+ void visit(const ir::operation::ElementwiseBinary &node) override;
+ void visit(const ir::operation::ElementwiseUnary &node) override;
+ void visit(const ir::operation::EmbeddingLookup &node) override;
+ void visit(const ir::operation::ExpandDims &node) override;
+ void visit(const ir::operation::HashtableLookup &node) override;
+ void visit(const ir::operation::TransposeConv &node) override;
+ void visit(const ir::operation::Gather &node) override;
+ void visit(const ir::operation::DepthToSpace &node) override;
+ void visit(const ir::operation::Pack &node) override;
+ void visit(const ir::operation::LSTM &node) override;
+ void visit(const ir::operation::L2Normalization &node) override;
+ void visit(const ir::operation::Unpack &node) override;
+ void visit(const ir::operation::Pad &node) override;
+ void visit(const ir::operation::Select &node) override;
+ void visit(const ir::operation::StridedSlice &node) override;
+ void visit(const ir::operation::Split &node) override;
+ void visit(const ir::operation::Shape &node) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
+ void visit(const ir::operation::Reverse &node) override;
+ void visit(const ir::operation::If &node) override;
+ void visit(const ir::operation::While &node) override;
+ void visit(const ir::operation::SquaredDifference &node) override;
+ void visit(const ir::operation::Tile &node) override;
+ void visit(const ir::operation::Range &node) override;
+ void visit(const ir::operation::MatrixBandPart &node) override;
+ void visit(const ir::operation::LogSoftmax &node) override;
+
+private:
+ void checkUnaryOp(const ir::Operation &node);
+
+private:
+ // TODO Remove _ctx field
+ const ir::Graph &_graph;
+ const ir::Operands &_ctx;
+ ir::Layout _current_op_seq_layout;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_SHAPE_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
index 4eba1ff49..df129d98b 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -147,16 +147,26 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
const auto &input = _operands.at(input_idx);
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto &axis = _operands.at(axis_idx);
+
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- const auto rank = input.info().shape().rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
- assert(0 <= axis && axis < rank);
+ if (!axis.isConstant())
+ {
+ output.info().setDynamic();
+ _return_has_dynamic_tensor = true;
+ return;
+ }
+
+ const auto rank = input.info().shape().rank();
+ auto axis_value = axis.asScalar<int32_t>();
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
// re-sizing output shape
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis, rank);
+ ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank);
output.info().shape(new_shape);
}
@@ -165,13 +175,60 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
const auto output_index = op.getOutputs().at(0);
- const auto lhs = _operands.at(lhs_index);
- const auto rhs = _operands.at(rhs_index);
+ const auto &lhs = _operands.at(lhs_index);
+ const auto &rhs = _operands.at(rhs_index);
auto &output = _operands.at(output_index);
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
+void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+ const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto &input = _operands.at(input_idx);
+
+ const auto cluster_idx{
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ const auto &cluster = _operands.at(cluster_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = _operands.at(output_idx);
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+ assert(cluster_buf);
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape(
+ input.info().shape(), cluster.info().shape(), cluster_buf);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+ const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto &indices = _operands.at(indices_idx);
+
+ const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+ const auto &input_binary = _operands.at(input_binary_idx);
+
+ const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+ const auto &cluster = _operands.at(cluster_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = _operands.at(output_idx);
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+ assert(cluster_buf);
+
+ auto rank = input_binary.shape().rank();
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferBCQGatherShape(
+ indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
+
+ output.info().shape(new_shape);
+}
+
void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
@@ -439,6 +496,98 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
}
+void StaticShapeInferer::visit(const ir::operation::LSTM &op)
+{
+ const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ auto &output = _operands.at(output_index);
+
+ const auto output_state_out_index{
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+ const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+ const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+ if (output.info().isDynamic() || (_operands.exist(output_state_out_index) &&
+ _operands.at(output_state_out_index).info().isDynamic()) ||
+ (_operands.exist(cell_state_out_index) &&
+ _operands.at(cell_state_out_index).info().isDynamic()) ||
+ (_operands.exist(scratch_buffer_index) &&
+ _operands.at(scratch_buffer_index).info().isDynamic()))
+ return;
+
+ const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto &input = _operands.at(input_index);
+
+ const auto input_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
+
+ const auto recurrent_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
+
+ // re-sizing outputs
+ const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
+ : input.shape().dim(0);
+ const int n_cell = input_to_output_weights.shape().dim(0);
+ const int n_output = recurrent_to_output_weights.shape().dim(1);
+ if (input.shape().rank() == 3)
+ {
+ if (op.param().time_major)
+ output.info().shape(ir::Shape{input.shape().dim(0), n_batch, n_output});
+ else
+ output.info().shape(ir::Shape{n_batch, input.shape().dim(1), n_output});
+ }
+ else
+ {
+ assert(input.shape().rank() == 2);
+ output.info().shape(ir::Shape{n_batch, n_output});
+ }
+
+ if (_operands.exist(output_state_out_index))
+ {
+ auto &output_state_out = _operands.at(output_state_out_index);
+ output_state_out.info().shape(ir::Shape{n_batch, n_output});
+ }
+
+ if (_operands.exist(cell_state_out_index))
+ {
+ auto &cell_state_out = _operands.at(cell_state_out_index);
+ cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
+ }
+
+ if (_operands.exist(scratch_buffer_index))
+ {
+ auto &scratch_buffer = _operands.at(scratch_buffer_index);
+
+ const auto input_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+ bool has_input_to_input_weights =
+ _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+ if (has_cifg_param)
+ {
+ scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 4});
+ }
+ else
+ {
+ scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 3});
+ }
+ }
+}
+
void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
@@ -683,9 +832,29 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
+ int32_t height_out, width_out;
+ if (op.getInputs().size() == 2)
+ {
+ auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+ if (!size.isConstant())
+ {
+ output.info().setDynamic();
+ _return_has_dynamic_tensor = true;
+ return;
+ }
+ const auto size_v = size.asVector<std::int32_t>();
+ height_out = size_v[0];
+ width_out = size_v[1];
+ }
+ else
+ {
+ height_out = op.param().height_out;
+ width_out = op.param().width_out;
+ }
+
// Shape inferencing logic based on Params
- ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
- input.shape(), op.param().height_out, op.param().width_out);
+ ir::Shape new_shape =
+ shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
// if size_op is from Const, TFLC put the shape of output into tensor
if (new_shape != output.shape())
@@ -803,21 +972,35 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
void StaticShapeInferer::visit(const ir::operation::Split &op)
{
- const auto input_idx{op.getInputs().at(0)};
+ const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
const auto &input = _operands.at(input_idx);
- const auto axis = op.param().axis;
+ const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+ const auto &axis = _operands.at(axis_idx);
+
+ auto outputs = op.getOutputs();
+ if (!axis.isConstant())
+ {
+ for (auto output_idx : outputs)
+ {
+ ir::Operand &output = _operands.at(output_idx);
+ output.info().setDynamic();
+ }
+ _return_has_dynamic_tensor = true;
+ return;
+ }
+
const auto num_splits = op.param().num_splits;
const auto rank = input.info().shape().rank();
- auto axis_resolved = axis < 0 ? axis + rank : axis;
+ auto axis_value = axis.asScalar<int32_t>();
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- assert(0 <= axis_resolved && axis_resolved < rank);
+ assert(0 <= axis_value && axis_value < rank);
ir::Shape new_shape =
- shape_inference::inferSplitShape(input.info().shape(), axis_resolved, num_splits);
- auto output_tensors = op.getOutputs();
- for (auto output_idx : output_tensors)
+ shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
+ for (auto output_idx : outputs)
{
ir::Operand &output = _operands.at(output_idx);
output.info().shape(new_shape);
@@ -838,13 +1021,6 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Squeeze output shpae
ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
output.info().shape(new_shape);
@@ -909,7 +1085,8 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
assert(multiplier_buffer);
// re-sizing output shape
- auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer);
+ auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer,
+ multiplier.shape().num_elements());
output.info().shape(new_shape);
}
@@ -918,14 +1095,43 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &input = _operands.at(input_idx);
+ const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+ const auto &perm = _operands.at(perm_idx);
+
+ // perm.shape() != ir::Shape{0} means that perm is (n-1...0)
+ // TODO This condition changes to perm.num_elements() == 0
+ const auto is_regular_transpose = perm.shape() == ir::Shape{0};
+
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
- const auto perm{op.param().perm};
- // const auto rank{op.param().rank};
+ auto &output = _operands.at(output_idx);
+ if (!perm.isConstant() && !is_regular_transpose)
+ {
+ output.info().setDynamic();
+ _return_has_dynamic_tensor = true;
+ return;
+ }
- // set output shape, based on input and params
- ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
+ ir::Shape new_shape;
+ if (is_regular_transpose)
+ {
+ // Call by (n-1...0)
+ new_shape = shape_inference::inferTransposeShape(input.info().shape(), nullptr, 0);
+ }
+ else
+ {
+ // Check rank
+ if (input.info().shape().rank() != static_cast<int>(perm.info().shape().num_elements()))
+ {
+ throw std::runtime_error("StaticShapeInferer failed, bad rank size: " +
+ std::to_string(perm.info().shape().num_elements()));
+ }
+
+ // set output shape, based on input and params
+ const auto perm_buf = reinterpret_cast<const int32_t *>(perm.data()->base());
+ new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm_buf,
+ perm.shape().num_elements());
+ }
output.info().shape(new_shape);
}
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 8be87b081..e42225cbf 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -69,7 +69,7 @@ public:
return _cf_tensor_reg;
}
- std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+ backend::ITensor *getITensor(ir::OperandIndex ind) const
{
for (auto &tensor_reg : _tensor_regs)
{
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 647669e46..ef6240894 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -44,7 +44,7 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
const auto key = ReplaceKey{input, factor};
if (_replace_operands_map.count(key) == 0)
{
- auto new_object = object;
+ ir::Operand new_object(object);
new_object.unsetDef();
// TODO Remove const_case
const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
@@ -81,7 +81,7 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
}
// Now this runtime does not support the node making output as constant
- for (const auto &output : node.getOutputs())
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
UNUSED_RELEASE(output);
assert(!_graph.operands().at(output).isConstant());
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
new file mode 100644
index 000000000..c176f6ffb
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void ConstantOutputPass::callback(const ir::OperandIndex &ind, ir::Operand &obj)
+{
+ if (!_graph.getOutputs().contains(ind) || !obj.isConstant())
+ return;
+
+ auto permute_input_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+ auto &permute_input_obj = _graph.operands().at(permute_input_ind);
+
+ // Move the const data
+ permute_input_obj.data(obj.shareData());
+ obj.releaseData();
+ obj.info().setAsNonConst();
+
+ using ir::operation::Permute;
+ auto permute_obj = std::make_unique<Permute>(permute_input_ind, ind, Permute::Type::COPY);
+ auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+ permute_input_obj.insertUse(permute_ind);
+ obj.setDef(permute_ind);
+
+ // Make the operations that uses this operand to use the generated operand
+ auto orig_uses = obj.getUses();
+ for (auto use : orig_uses)
+ {
+ permute_input_obj.insertUse(use);
+ obj.removeUse(use);
+ _graph.operations().at(use).replaceInputs(ind, permute_input_ind);
+ }
+
+ VERBOSE(ConstantOutputPass) << "Permute Op inserted for a constant ouput, node index : "
+ << permute_ind << std::endl;
+ VERBOSE(ConstantOutputPass) << " - Input (inserted) Operand : " << permute_input_ind
+ << std::endl;
+ VERBOSE(ConstantOutputPass) << " - Output(original) Operand : " << ind << std::endl;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
new file mode 100644
index 000000000..193dd3a68
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+
+#include "OperandPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle constant model outputs
+ *
+ * As an output buffer is given right before an execution but constant initialization is done at
+ * prepare phase, the current runtime structure cannot handle when an output is constant.
+ * To resolve this problem, this pass inserts a Permute layer with a const input and make the model
+ * output tensor to be its output.
+ *
+ * e.g.)
+ *
+ * ((Const Output))
+ *
+ * becomes
+ *
+ * (Const) -> [Permute] -> ((Output))
+ *
+ * Note that this is a mandatory pass for Graph.
+ */
+class ConstantOutputPass : public OperandPass
+{
+public:
+ using OperandPass::OperandPass;
+
+public:
+ std::string id() final { return "ConstantOutputPass"; }
+
+public:
+ void callback(const ir::OperandIndex &i, ir::Operand &o) final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
new file mode 100644
index 000000000..f50fae0d3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OddOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void OddOutputPass::run()
+{
+ auto &outputs = _graph.getOutputs();
+
+ VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
+ << std::endl;
+ for (auto &ind : outputs)
+ {
+ if (_graph.getInputs().contains(ind))
+ {
+ auto permute_output_ind = insertPermute(ind);
+ // Update the output to be newly added operand
+ _graph.getOutputs().replace(ind, permute_output_ind);
+ }
+ }
+
+ VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
+ std::unordered_set<ir::OperandIndex> occurence;
+ for (auto &ind : outputs)
+ {
+ auto &obj = _graph.operands().at(ind);
+ if (occurence.count(ind) == 0)
+ {
+ occurence.insert(ind);
+ continue;
+ }
+
+ // Panic when it is const, it must have been handled earlier in another pass
+ UNUSED_RELEASE(obj);
+ assert(!obj.isConstant());
+
+ auto permute_output_ind = insertPermute(ind);
+ ind = permute_output_ind; // Replace output index to fix output duplication
+ }
+}
+
+ir::OperandIndex OddOutputPass::insertPermute(ir::OperandIndex ind)
+{
+ auto &obj = _graph.operands().at(ind);
+ auto output_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+ auto &output_obj = _graph.operands().at(output_ind);
+
+ using ir::operation::Permute;
+ auto permute_obj = std::make_unique<Permute>(ind, output_ind, Permute::Type::COPY);
+ auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+ output_obj.setDef(permute_ind);
+ obj.insertUse(permute_ind);
+
+ VERBOSE(OddOutputPass) << "Permute Op inserted for a constant output, node index : "
+ << permute_ind << std::endl;
+ VERBOSE(OddOutputPass) << " - Input (original) Operand : " << ind << std::endl;
+ VERBOSE(OddOutputPass) << " - Output(inserted) Operand : " << output_ind << std::endl;
+
+ return output_ind;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.h b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
new file mode 100644
index 000000000..2accbac60
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+
+#include <unordered_set>
+
+#include "Pass.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle odd outputs in a subgraph
+ *
+ * Runtime Graph IR requires every input or output must have distinct tensor index, this is onert's
+ * restriction. However we allow duplication of indices in the models(or API). So we should
+ * transform the graph after model-loading.
+ *
+ * This is necessary since our API lets users to set different buffers for each input and output so
+ * it is unavoidable that we must copy the value at runtime.
+ *
+ * Note that this is a mandatory pass for Graph.
+ *
+ * Case 1 : An operand which is a model output and a model input
+ *
+ * Create an operand and insert a Permute(copy) op between them. And change the output to be the
+ * newly generated operand.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0 and also Output0))
+ * becomes
+ * ((#0 Input0)) -> [#0 Permute] -> ((#1 Output0))
+ * ```
+ *
+ * Case 2 : Two or more duplicated outputs
+ *
+ * Do the same with Case 1, but between two outputs of the same tensor index.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0 and also Output1))
+ * becomes
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0)) [#1 Permute] -> ((#2 Output1))
+ * ```
+ *
+ */
+class OddOutputPass : public Pass
+{
+public:
+ using Pass::Pass;
+
+public:
+ std::string id() final { return "OddOutputPass"; }
+
+public:
+ void run() override;
+
+private:
+ ir::OperandIndex insertPermute(ir::OperandIndex input);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc
new file mode 100644
index 000000000..2a058c8ac
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PassRunner.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+PassRunner &PassRunner::append(std::unique_ptr<Pass> pass)
+{
+ _passes.emplace_back(std::move(pass));
+ return *this;
+}
+
+void PassRunner::run()
+{
+ for (auto &pass : _passes)
+ {
+ VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
+ pass->run();
+ VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl;
+ // TODO Dump graph(LowerInfo, OpSequence, ...)?
+ }
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.h b/runtime/onert/core/src/compiler/pass/PassRunner.h
new file mode 100644
index 000000000..a43c83f89
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+#define __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+#include "Pass.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Composite passes with logging
+ */
+class PassRunner
+{
+public:
+ PassRunner() = default;
+ PassRunner &append(std::unique_ptr<Pass> pass);
+
+ void run();
+
+private:
+ std::vector<std::unique_ptr<Pass>> _passes;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PASS_RUNNER_H__
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index f01697034..504f1b995 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -53,6 +53,20 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
if (_graph.getOutputs().contains(out_operand))
{
+ // If the input is a const, we cannot remove it since we cannot put the constant data in the
+ // output buffer during prepare phase.
+ auto permute_input = node.getInputs().at(0);
+ if (_graph.operands().at(permute_input).isConstant())
+ return;
+ // If the input is a model input, we cannot remove it since our API lets users to set different
+ // buffers for inputs and outputs even though one tensor is both at the same time.
+ auto permute_output = node.getOutputs().at(0);
+ if (_graph.getInputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+ return;
+ // Likewise, if copying between outputs to outputs, keep it.
+ if (_graph.getOutputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+ return;
+
// Exceptional case : When the output operand is a model output
// In this case we keep the output and remove the input
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index c5c95c726..93d125307 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -212,7 +212,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
}
}
- for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
auto lower_info = _lowered_graph.getLowerInfo(output);
lower_info->removeDefPermuteFactor(removed_factor);
@@ -279,6 +279,18 @@ void PermutationOperationPass::visit(const ir::operation::Gather &node)
}
}
+void PermutationOperationPass::visit(const ir::operation::OneHot &node)
+{
+ const auto &output_ind = node.getOutputs().at(0);
+ const auto &output_obj = _graph.operands().at(output_ind);
+ const auto &output_shape = output_obj.shape();
+
+ if (output_shape.rank() >= 4)
+ {
+ changeToKeepLayout(node);
+ }
+}
+
void PermutationOperationPass::visit(const ir::operation::Pack &node)
{
const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
index 2dd76b971..cea5de288 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -44,6 +44,7 @@ public:
void visit(const ir::operation::Concat &) final;
void visit(const ir::operation::ElementwiseBinary &) final;
void visit(const ir::operation::ElementwiseUnary &) final;
+ void visit(const ir::operation::OneHot &) final;
void visit(const ir::operation::Pack &) final;
void visit(const ir::operation::PReLU &) final;
void visit(const ir::operation::SquaredDifference &) final;
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 118057f09..8f3cf328c 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -81,11 +81,8 @@ void DotDumper::dump(const std::string &tag)
}
else
{
- showing_cond = !object.isConstant();
- }
- if (object.isConstant() || _graph.getInputs().contains(index))
- {
- showing_cond = showing_cond && (object.getUses().size() > 0);
+ showing_cond =
+ !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
}
if (showing_cond)
{
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index a69ae9cdb..53bc3c204 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -77,14 +77,12 @@ bool DataflowExecutor::noWaitingJobs()
[](const std::unique_ptr<Job> &job) { return job == nullptr; });
}
-DataflowExecutor::DataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
- compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs)},
+DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs,
+ compiler::CodeMap &&code_map)
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
@@ -161,6 +159,8 @@ void DataflowExecutor::executeImpl()
_subject.notifyJobBegin(this, op_seq, backend);
+ job->fn_seq()->initRunning();
+
// check if FunctionSequence needs to handle dynamic tensor
bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index 8d60e3e4b..69dfda15c 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -50,10 +50,9 @@ public:
* @param code_map OpSequence and its code map
*/
DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc
index 70bddfce4..0f604c43f 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -23,14 +23,6 @@ namespace onert
namespace exec
{
-inline backend::IDynamicTensorManager *
-dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor)
-{
- if (!tensor->dynamic_tensor_manager())
- throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."};
- return tensor->dynamic_tensor_manager();
-}
-
void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
@@ -64,7 +56,7 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
- dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -96,30 +88,32 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto &input = _tensor_registry->getITensor(input_idx);
- auto input_shape = input->getShape();
+ const auto input = _tensor_registry->getITensor(input_idx);
+
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto axis = _tensor_registry->getITensor(axis_idx);
+
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
if (!input->is_dynamic())
return;
+ auto input_shape = input->getShape();
+ auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
const auto rank = input_shape.rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
- assert(0 <= axis && axis < rank);
-
- auto output_ind = op.getOutputs().at(0);
- auto output = _tensor_registry->getITensor(output_ind);
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank);
+ ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -141,7 +135,68 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
// TODO
auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
+ output->applyShape(new_shape);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+ const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto &input = _tensor_registry->getITensor(input_idx);
+
+ const auto cluster_idx{
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+ assert(cluster->is_constant());
+
+ if (!input->is_dynamic())
+ return;
+
+ auto input_shape = input->getShape();
+ auto cluster_shape = cluster->getShape();
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+ assert(cluster_buf);
+
+ ir::Shape new_shape =
+ shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
+
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
+
+ output->applyShape(new_shape);
+ assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+ const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto &indices = _tensor_registry->getITensor(indices_idx);
+
+ const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto &input_binary = _tensor_registry->getITensor(input_binary_idx);
+
+ const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+ const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+ assert(cluster->is_constant());
+
+ if (!indices->is_dynamic())
+ return;
+
+ auto indices_shape = indices->getShape();
+ auto cluster_shape = cluster->getShape();
+ auto rank = input_binary->getShape().rank();
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+ assert(cluster_buf);
+
+ ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster_buf, rank, op.param());
+
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
+
+ output->applyShape(new_shape);
+ assert(output->buffer() != nullptr);
}
void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
@@ -170,7 +225,7 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
// set output shape and output buffer
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -236,7 +291,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
for (auto input_ind : op.getInputs())
{
auto input = _tensor_registry->getITensor(input_ind);
- if (input != first_input && !isConcatible(first_input.get(), input.get(), op.param().axis))
+ if (input != first_input && !isConcatible(first_input, input, op.param().axis))
throw std::runtime_error("input shapes does not matched for concat");
}
}
@@ -255,7 +310,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
auto output = _tensor_registry->getITensor(output_ind);
auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
@@ -278,7 +333,7 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -338,7 +393,7 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -354,14 +409,14 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
if ((!input->is_dynamic()) && (!output->is_dynamic()))
return;
- assert(input.get()->data_type() == ir::DataType::INT32);
+ assert(input->data_type() == ir::DataType::INT32);
auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
assert(input_buf);
auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -384,7 +439,7 @@ void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -416,7 +471,7 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -425,6 +480,109 @@ void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
}
+void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
+{
+ const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ auto output = _tensor_registry->getITensor(output_index);
+
+ const auto output_state_out_index{
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+ const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+ const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+ if (!output->is_dynamic() &&
+ !(_tensor_registry->getITensor(output_state_out_index) != nullptr &&
+ _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) &&
+ !(_tensor_registry->getITensor(cell_state_out_index) != nullptr &&
+ _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) &&
+ !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr &&
+ _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()))
+ return;
+
+ const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input = _tensor_registry->getITensor(input_index);
+ const auto input_shape = input->getShape();
+
+ const auto input_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
+ const auto input_to_output_weights_shape = input_to_output_weights->getShape();
+
+ const auto recurrent_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_output_weights =
+ _tensor_registry->getITensor(recurrent_to_output_weights_index);
+ const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
+
+ // re-sizing outputs
+ const int n_batch =
+ (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
+ const int n_cell = input_to_output_weights_shape.dim(0);
+ const int n_output = recurrent_to_output_weights_shape.dim(1);
+ if (input_shape.rank() == 3)
+ {
+ if (op.param().time_major)
+ output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output});
+ else
+ output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output});
+ }
+ else
+ {
+ assert(input_shape.rank() == 2);
+ output->applyShape(ir::Shape{n_batch, n_output});
+ }
+ assert(output->buffer() != nullptr);
+
+ auto output_state_out = _tensor_registry->getITensor(output_state_out_index);
+ if (output_state_out != nullptr)
+ {
+ output_state_out->applyShape(ir::Shape{n_batch, n_output});
+ assert(output_state_out->buffer() != nullptr);
+ }
+
+ auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index);
+ if (cell_state_out != nullptr)
+ {
+ cell_state_out->applyShape(ir::Shape{n_batch, n_cell});
+ assert(cell_state_out->buffer() != nullptr);
+ }
+
+ auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index);
+ if (scratch_buffer != nullptr)
+ {
+ const auto input_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+ const auto input_to_input_weights_shape =
+ _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
+ bool has_input_to_input_weights =
+ input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
+
+ const auto recurrent_to_input_weights_shape =
+ _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
+ bool has_recurrent_to_input_weights = recurrent_to_input_weights_shape.dim(0) != 0 &&
+ recurrent_to_input_weights_shape.dim(1) != 0;
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+ if (has_cifg_param)
+ {
+ scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4});
+ }
+ else
+ {
+ scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3});
+ }
+ assert(scratch_buffer->buffer() != nullptr);
+ }
+}
+
void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
@@ -452,7 +610,7 @@ void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
const auto axis_val = op.param().axis;
ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -488,7 +646,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pack &op)
ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -515,7 +673,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pad &op)
shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
// change output shape and reallocate output tensor memory
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -567,7 +725,7 @@ void DynamicShapeInferer::visit(const ir::operation::Range &op)
*reinterpret_cast<int32_t *>(limit_tensor->buffer()),
*reinterpret_cast<int32_t *>(delta_tensor->buffer()));
}
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -611,7 +769,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -665,7 +823,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -681,7 +839,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -705,14 +863,35 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
return;
// getting output shape from input shape and Params
- auto output_shape = shape_inference::inferResizeBilinearShape(
- input->getShape(), op.param().height_out, op.param().width_out);
+ int32_t height_out, width_out;
+ if (op.getInputs().size() == 2)
+ {
+ auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE);
+ auto size = _tensor_registry->getITensor(size_ind);
+ if (size->data_type() == ir::DataType::INT32)
+ {
+ auto size_buf = reinterpret_cast<const int32_t *>(size->buffer());
+ height_out = size_buf[0];
+ width_out = size_buf[1];
+ }
+ else
+ {
+ throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type");
+ }
+ }
+ else
+ {
+ height_out = op.param().height_out;
+ width_out = op.param().width_out;
+ }
+ auto output_shape =
+ shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
// if shape is changed, change output shape and reallocate output tensor memory
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -749,7 +928,7 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -768,7 +947,7 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
ir::Shape output_shape;
output_shape.append(input_shape.rank());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -794,7 +973,7 @@ void DynamicShapeInferer::visit(const ir::operation::Slice &op)
ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
- dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -831,7 +1010,7 @@ void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
- dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -840,27 +1019,37 @@ void DynamicShapeInferer::visit(const ir::operation::Split &op)
const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
const auto &input = _tensor_registry->getITensor(input_idx);
- if (!input->is_dynamic())
+ // Return if all tensors are not dynamic
+ bool has_dynamic = false;
+ for (const auto output_idx : op.getOutputs())
+ {
+ auto output = _tensor_registry->getITensor(output_idx);
+ has_dynamic |= output->is_dynamic();
+ }
+ if (!input->is_dynamic() && !has_dynamic)
{
return;
}
auto input_shape = input->getShape();
- const auto axis = op.param().axis;
+ const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+ const auto &axis = _tensor_registry->getITensor(axis_idx);
+
+ auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
const auto num_splits = op.param().num_splits;
const auto rank = input_shape.rank();
- auto axis_resolved = axis < 0 ? axis + rank : axis;
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- assert(0 <= axis_resolved && axis_resolved < rank);
+ assert(0 <= axis_value && axis_value < rank);
- ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_resolved, num_splits);
+ ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits);
for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
{
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
}
@@ -889,7 +1078,7 @@ void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -930,7 +1119,7 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
ir::Shape output_shape =
onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
- dynamicTensorManagerOf(output)->applyShape(output_index, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -952,10 +1141,11 @@ void DynamicShapeInferer::visit(const ir::operation::Tile &op)
auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
assert(multiplier_buffer);
- auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer);
+ auto output_shape =
+ shape_inference::inferTileShape(input_shape, multiplier_buffer, multiplier->dimension(0));
// set output shape and output buffer
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -967,17 +1157,48 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
// from op, access the buffer of second input to read new shape
auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
- auto input_tensor = _tensor_registry->getITensor(input_ind);
- auto input_shape = input_tensor->getShape();
+ auto input = _tensor_registry->getITensor(input_ind);
+ auto input_shape = input->getShape();
- if (!input_tensor->is_dynamic())
+ /*
+ Here, the state after compilation (static shape inference) could be one of the following:
+
+ input perms output execution-time shape inf required
+ ------------------------------------ --------------------------------
+ case 1) static const static X
+ case 2) static non-const dynamic O
+ case 3) dynamic const dynamic O
+ case 4) dynamic non-const dynamic O
+
+ So, only when both input1 and ouput are static, we can skip dynamic shape inference.
+ */
+ if ((!input->is_dynamic()) && (!output->is_dynamic()))
return;
- const auto perm{op.param().perm};
- // set output shape, based on input and params
- ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm);
+ auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION);
+ auto perm = _tensor_registry->getITensor(perm_ind);
+
+ ir::Shape new_shape;
+ // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
+ if (perm->dimension(0) == 0) // This condition means that perm is (n-1...0)
+ {
+ // Call by (n-1...0)
+ new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
+ }
+ else
+ {
+ // Check rank
+ if (input->num_dimensions() != perm->getShape().num_elements())
+ {
+ throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
+ std::to_string(perm->getShape().num_elements()));
+ }
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ // set output shape, based on input and params
+ const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
+ new_shape = shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->dimension(0));
+ }
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -1005,7 +1226,7 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 7feb3ab68..21fdd9c05 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -34,14 +34,13 @@ Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors
void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
{
- // This should be called BEFORE setInput.
- if (_io_desc.inputs.at(index.value()) != 0)
- throw std::runtime_error("Error in calling order");
-
// This will be used later to set input tensor dynamic
// Note that 'compiled' model will not be updated with new_shape
// but new_shape will change model input shape while 'running' the model
_io_desc.dynamic_input_shapes[index] = new_shape;
+
+ VERBOSE(Execution) << "Model input shape will be changed at the start of execute()"
+ << "(index: " << index.value() << ")" << std::endl;
}
// TODO Remove default parameter
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 060f874de..5883d9a1c 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -22,6 +22,7 @@
#include "exec/IExecutor.h"
#include "misc/polymorphic_downcast.h"
#include "ir/OpSequence.h"
+#include "util/EventWriter.h"
namespace onert
{
@@ -70,7 +71,7 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
};
ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
- : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}, _graph{graph}
+ : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph}
{
}
@@ -78,7 +79,7 @@ ChromeTracingObserver::~ChromeTracingObserver()
{
try
{
- _recorder.writeToFile(_ofs);
+ EventWriter{_recorder}.writeToFiles(_base_filepath);
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index ac0076ed2..f8c2acca5 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -76,7 +76,7 @@ private:
static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
private:
- std::ofstream _ofs;
+ const std::string &_base_filepath;
EventRecorder _recorder;
EventCollector _collector;
const ir::Graph &_graph;
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index f835a9675..018a0bba0 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -27,38 +27,32 @@ namespace exec
{
ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs)
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs)
: _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
{
// TODO Fix the way of knowing whether it is primary or not
bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
if (!primary_executor)
{
auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<std::shared_ptr<backend::ITensor>> list;
+ std::vector<backend::ITensor *> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
- DynAllocInfo dyn_alloc_info{ind};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
};
auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<std::shared_ptr<backend::ITensor>> list;
+ std::vector<backend::ITensor *> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
- DynAllocInfo dyn_alloc_info{ind};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
@@ -66,28 +60,9 @@ ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_gra
_input_tensors = build_input_tensor_list(_graph.getInputs());
_output_tensors = build_output_tensor_list(_graph.getOutputs());
}
- else
- {
- assert(input_tensors.size() == _graph.getInputs().size());
- assert(output_tensors.size() == _graph.getOutputs().size());
- for (uint32_t i = 0; i < input_tensors.size(); i++)
- {
- auto tensor = input_tensors[i];
- auto ind = _graph.getInputs().at(i);
- DynAllocInfo dyn_alloc_info{ind};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- for (uint32_t i = 0; i < output_tensors.size(); i++)
- {
- auto tensor = output_tensors[i];
- auto ind = _graph.getOutputs().at(i);
- DynAllocInfo dyn_alloc_info{ind};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- }
}
-void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
+void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
const std::shared_ptr<IPermuteFunction> &pre_fn)
{
// For thread-safe, use mutex
@@ -108,22 +83,12 @@ void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>>
// If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
if (src_tensor != nullptr && input_tensor != nullptr)
{
- auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
const auto orig_input_shape = input_tensor->getShape();
const auto changed_input_shape =
convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
if (orig_input_shape != changed_input_shape)
{
- if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
- {
- // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
- throw std::runtime_error("Unknown dim is found at execution time for a backend that "
- "does not support dynamic tensor");
- }
- else
- {
- input_tensor->set_dynamic();
- }
+ input_tensor->set_dynamic();
}
}
}
@@ -147,7 +112,7 @@ void ExecutorBase::execute(const IODescription &desc)
for (uint32_t i = 0; i < _input_tensors.size(); ++i)
{
// TODO Remove dynamic_cast
- auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
+ auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]);
assert(tensor);
auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
if (input_shape != desc.dynamic_input_shapes.end())
@@ -155,6 +120,7 @@ void ExecutorBase::execute(const IODescription &desc)
tensor->set_dynamic();
tensor->setShape(input_shape->second);
}
+ // TODO Check if (desc.inputs[i] == nullptr)
// TODO Better design for ITensor? (we need const_cast as ITensor is writable)
tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
desc.inputs[i]->size);
@@ -166,12 +132,12 @@ void ExecutorBase::execute(const IODescription &desc)
for (uint32_t i = 0; i < _output_tensors.size(); ++i)
{
// TODO Remove dynamic_cast
- auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
+ auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]);
assert(tensor);
tensor->set_dynamic(); // It can't be resized but shape could change
- // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
- tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
- desc.outputs[i]->size);
+ if (desc.outputs[i] == nullptr)
+ throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
+ tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
}
executeImpl();
@@ -218,17 +184,8 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript
auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
if (shape_sig_found != desc.dynamic_input_shapes.end())
{
- auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
- if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
- throw std::runtime_error("Unknown dim is found at execution time for a backend that "
- "does not support dynamic tensor");
-
auto changed_input_shape = shape_sig_found->second;
- auto operand_ind = dyn_alloc_info->second.ind;
-
- auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
- assert(dyn_tensor_manager);
- dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+ _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
}
}
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index a13be7dbf..8a6ec9174 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -20,9 +20,7 @@
#include <mutex>
#include "IPermuteFunction.h"
-#include "Source.h"
#include "exec/ExecutionObservers.h"
-#include "Sink.h"
#include "ShapeConverter.h"
#include "exec/IExecutor.h"
#include "compiler/LoweredGraph.h"
@@ -51,10 +49,9 @@ public:
* @param tensor_builders Tensor builders that are currently used
*/
ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs);
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs);
virtual ~ExecutorBase() = default;
@@ -66,7 +63,7 @@ public:
* @param src_tensor Tensor list that will be copied to input tensors of this
* @param pre_fn The permutation function that copy from src_tensor to input tensors of this
*/
- void execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
+ void execute(const std::vector<backend::ITensor *> &src_tensors,
const std::shared_ptr<IPermuteFunction> &pre_fn);
void execute(const IODescription &desc) final;
@@ -81,17 +78,9 @@ public:
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
- const std::vector<std::shared_ptr<backend::ITensor>> &getInputTensors() const
- {
- return _input_tensors;
- }
-
- const std::vector<std::shared_ptr<backend::ITensor>> &getOutputTensors() const
- {
- return _output_tensors;
- }
+ const std::vector<backend::ITensor *> &getInputTensors() const { return _input_tensors; }
- const DynAllocInfoMap &getInputsDynamicAllocInfo() const { return _input_to_dyn_alloc_info; }
+ const std::vector<backend::ITensor *> &getOutputTensors() const { return _output_tensors; }
protected:
/**
@@ -104,11 +93,8 @@ protected:
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
- std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
- DynAllocInfoMap _input_to_dyn_alloc_info;
- DynAllocInfoMap _output_to_dyn_alloc_info;
- backend::TensorManagerSet _tensor_mgrs;
+ std::vector<backend::ITensor *> _input_tensors;
+ std::vector<backend::ITensor *> _output_tensors;
std::mutex _mutex;
private:
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index fb31f7582..8aefa5eeb 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -28,9 +28,11 @@ namespace exec
void FunctionSequence::run()
{
- // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
{
+ // acl_cl and acl_neon backend don't support dynamic shape.
+ // _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon
+ // Thus, those two bakends cannot reach here.
if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
throw std::runtime_error("operation and functions should be mapped one by one");
@@ -61,11 +63,6 @@ void FunctionSequence::run()
{
for (const auto &function : _functions)
{
- auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
- if (sub_func_seq != nullptr)
- {
- sub_func_seq->enableDynamicShapeInferer(false);
- }
function->run();
}
}
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 6b4d15380..94bc2e436 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -50,13 +50,13 @@ private:
public:
virtual void run() override
{
- assert(_src_tensors.size() > 0);
+ // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
assert(_src_tensors.size() == _dst_tensors.size());
auto src_it = _src_tensors.begin();
auto dst_it = _dst_tensors.begin();
while (src_it != _src_tensors.end())
{
- const auto src_tensor = *src_it;
+ auto src_tensor = *src_it;
auto dst_tensor = *dst_it;
if (src_tensor != dst_tensor)
{
@@ -101,9 +101,8 @@ public:
virtual void optimize() = 0;
private:
- template <class T>
- void permute(const std::shared_ptr<backend::ITensor> &src, std::shared_ptr<backend::ITensor> &dst,
- size_t rank)
+ // TODO make src const by proving const access()
+ template <class T> void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank)
{
const auto permute_type = [&]() -> PermuteType {
if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW)
@@ -121,127 +120,65 @@ private:
}();
auto fn = [&](backend::ITensor &src_tensor) {
dst->access([&](backend::ITensor &dst_tensor) {
- auto src_buffer = src_tensor.buffer();
- auto src_size = src_tensor.total_size();
- auto dst_buffer = dst_tensor.buffer();
- if (permute_type == PermuteType::COPY)
+ if (rank == 4 && permute_type != PermuteType::COPY)
{
- assert(src_tensor.layout() == dst_tensor.layout());
- if (!src_tensor.has_padding() && !dst_tensor.has_padding())
+ switch (permute_type)
{
- assert(src_size <= dst_tensor.total_size());
- memcpy(dst_buffer, src_buffer, src_size);
- return;
- }
- }
- switch (rank)
- {
- case 0:
- case 1:
- {
- const int32_t copy_len = dst_tensor.dimension(0);
-
- memcpy(dst_buffer, src_buffer, copy_len * sizeof(T));
- break;
- }
- case 2:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t copy_len = dst_tensor.dimension(1);
-
- for (int32_t i = 0; i < dim_0; ++i)
+ case PermuteType::NHWC_TO_NCHW:
{
- ir::Coordinates coords{i, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
+ ir::FeatureShape shape;
+ shape.N = dst_tensor.dimension(0);
+ shape.C = dst_tensor.dimension(1);
+ shape.H = dst_tensor.dimension(2);
+ shape.W = dst_tensor.dimension(3);
+ const feature::nhwc::Reader<T> from(&src_tensor);
+ feature::nchw::View<T> into(&dst_tensor);
+ feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
}
- break;
- }
- case 3:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t dim_1 = dst_tensor.dimension(1);
- const int32_t copy_len = dst_tensor.dimension(2);
-
- for (auto i = 0; i < dim_0; ++i)
+ case PermuteType::NCHW_TO_NHWC:
{
- for (auto j = 0; j < dim_1; ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
+ ir::FeatureShape shape;
+ shape.N = src_tensor.dimension(0);
+ shape.C = src_tensor.dimension(1);
+ shape.H = src_tensor.dimension(2);
+ shape.W = src_tensor.dimension(3);
+ const feature::nchw::Reader<T> from(&src_tensor);
+ feature::nhwc::View<T> into(&dst_tensor);
+ feature::iterate(shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ break;
}
- break;
- }
- case 4:
- {
- switch (permute_type)
+ default:
{
- case PermuteType::NHWC_TO_NCHW:
- {
- ir::FeatureShape shape;
- shape.N = dst_tensor.dimension(0);
- shape.C = dst_tensor.dimension(1);
- shape.H = dst_tensor.dimension(2);
- shape.W = dst_tensor.dimension(3);
- const feature::nhwc::Reader<T> from(&src_tensor);
- feature::nchw::View<T> into(&dst_tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- break;
- }
- case PermuteType::NCHW_TO_NHWC:
- {
- ir::FeatureShape shape;
- shape.N = src_tensor.dimension(0);
- shape.C = src_tensor.dimension(1);
- shape.H = src_tensor.dimension(2);
- shape.W = src_tensor.dimension(3);
- const feature::nchw::Reader<T> from(&src_tensor);
- feature::nhwc::View<T> into(&dst_tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- break;
- }
- case PermuteType::COPY:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t dim_1 = dst_tensor.dimension(1);
- const int32_t dim_2 = dst_tensor.dimension(2);
- const int32_t copy_len = dst_tensor.dimension(3);
-
- for (auto i = 0; i < dim_0; ++i)
- {
- for (auto j = 0; j < dim_1; ++j)
- {
- for (auto k = 0; k < dim_2; ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- }
- }
- break;
- }
- default:
- {
- throw std::runtime_error("Unsupported Permutation");
- break;
- }
+ throw std::runtime_error("Unsupported Permutation");
+ break;
}
- break;
}
- default:
- throw std::runtime_error("Unsupported rank in permutation");
- break;
+ }
+ else if (!src_tensor.has_padding() && !dst_tensor.has_padding())
+ {
+ auto src_size = src_tensor.total_size();
+ assert(src_size <= dst_tensor.total_size());
+ memcpy(dst_tensor.buffer(), src_tensor.buffer(), src_size);
+ }
+ else
+ {
+ auto loop_shape = src_tensor.getShape();
+ const auto copy_axis = loop_shape.rank() - 1;
+ const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T);
+ loop_shape.dim(copy_axis) = 1;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ memcpy(dst_tensor.buffer() + dst_tensor.calcOffset(coords),
+ src_tensor.buffer() + src_tensor.calcOffset(coords), copy_len);
+ });
}
});
};
@@ -275,8 +212,8 @@ private:
}
protected:
- std::vector<std::shared_ptr<backend::ITensor>> _src_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> _dst_tensors;
+ std::vector<backend::ITensor *> _src_tensors;
+ std::vector<backend::ITensor *> _dst_tensors;
// TODO Remove this member if it is possible
std::vector<size_t> _ranks;
};
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
index 69dfe9b9b..6e6ca110f 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -51,8 +51,10 @@ void LinearExecutor::executeImpl()
_subject.notifyJobBegin(this, op_seq, backend);
auto &fn_seq = code.fn_seq;
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
+ fn_seq->initRunning();
+
+ bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
fn_seq->run();
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index c224d3f4f..22d00ec30 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -47,13 +47,11 @@ public:
* @param code_map OpSequence and its code map
*/
LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs)}
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}
{
for (auto index : order)
{
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index ab234aacd..676bdb5fa 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -59,14 +59,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
_cv_jobs.notify_all();
}
-ParallelExecutor::ParallelExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
- compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)}
+ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs,
+ compiler::CodeMap &&code_map)
+ : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(code_map)}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
@@ -133,6 +132,8 @@ void ParallelExecutor::executeImpl()
notify(job_index);
};
+ job->fn_seq()->initRunning();
+
// dynamic tensor setting
bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 929edfce9..111c20c0c 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -51,10 +51,9 @@ public:
* @param code_map OpSequence and its code map
*/
ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+ const std::vector<backend::ITensor *> &input_tensors,
+ const std::vector<backend::ITensor *> &output_tensors,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/Sink.h b/runtime/onert/core/src/exec/Sink.h
deleted file mode 100644
index 6a99efe60..000000000
--- a/runtime/onert/core/src/exec/Sink.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SINK_H__
-#define __ONERT_EXEC_SINK_H__
-
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <misc/feature/IndexIterator.h>
-
-namespace onert
-{
-namespace exec
-{
-struct ISink
-{
- virtual ~ISink() = default;
-
- virtual void pull(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSink : public ISink
-{
-public:
- ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
- _shape{shape}, _copy{copy}, _io_layout{io_layout}
- {
- }
-
-protected:
- void pullUnif(onert::backend::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto input_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- ir::Coordinates coords{i, 0};
- memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
- input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NHWC)
- {
- const exec::feature::nchw::Reader<T> from(&tensor);
- exec::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NCHW)
- {
- const exec::feature::nhwc::Reader<T> from(&tensor);
- exec::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
- break;
- }
- default:
- throw std::runtime_error("NYI: rank > 4");
- break;
- }
- }
-
-private:
- T *_output_buffer;
- const size_t _output_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSink final : public ITemplSink<T>
-{
-public:
- PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
- {
- }
-
-public:
- void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-// Only supports NHWC format front-end(NNAPI) now
-template <typename T> class CopySink final : public ITemplSink<T>
-{
-public:
- CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
- {
- }
-
-public:
- void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SINK_H__
diff --git a/runtime/onert/core/src/exec/Source.h b/runtime/onert/core/src/exec/Source.h
deleted file mode 100644
index fb2be4dd8..000000000
--- a/runtime/onert/core/src/exec/Source.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SOURCE_H__
-#define __ONERT_EXEC_SOURCE_H__
-
-#include "feature/IndexIterator.h"
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <ir/Layout.h>
-#include "ir/Shape.h"
-
-namespace onert
-{
-namespace exec
-{
-
-struct ISource
-{
- virtual ~ISource() = default;
-
- virtual void push(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSource : public ISource
-{
-public:
- ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
- _shape{shape}, _copy(copy), _io_layout{io_layout}
- {
- }
-
- virtual void push(::onert::backend::ITensor &tensor) const = 0;
-
-protected:
- void pushUnif(onert::backend::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto output_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- ir::Coordinates coords{i, 0};
- memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NCHW)
- {
- const exec::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
- exec::feature::nhwc::View<T> into(&tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NHWC)
- {
- const exec::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
- exec::feature::nchw::View<T> into(&tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
-
- break;
- }
- default:
- throw std::runtime_error("NYI: rank > 4");
- break;
- }
- }
-
-private:
- const T *_input_buffer;
- const size_t _input_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSource final : public ITemplSource<T>
-{
-public:
- PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
- {
- }
-
-public:
- void push(onert::backend::ITensor &tensor) const override
- {
- // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
- ITemplSource<T>::pushUnif(tensor);
- }
-};
-
-template <typename T> class CopySource final : public ITemplSource<T>
-{
-public:
- CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
- {
- }
-
-public:
- void push(onert::backend::ITensor &tensor) const override { ITemplSource<T>::pushUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SOURCE_H__
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
index 008a4b9d4..8b72d537d 100644
--- a/runtime/onert/core/src/interp/Tensor.h
+++ b/runtime/onert/core/src/interp/Tensor.h
@@ -171,7 +171,6 @@ public:
int32_t data_offset() const override { return _info.typeInfo().offset(); }
const ir::OperandInfo &tensorInfo() const override { return _info; }
uint64_t num_elements() const override { return _info.shape().num_elements(); };
- backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; }
private:
const ir::OperandInfo _info;
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index fe8b1b443..605562ebc 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -103,7 +103,7 @@ void Graph::initializeUseDef()
{
operations().iterate([&](const OperationIndex &index, const Operation &node) -> void {
auto outputs = node.getOutputs();
- for (auto output : outputs)
+ for (auto output : outputs | ir::Remove::UNDEFINED)
{
operands().at(output).setDef(index);
}
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
index 4bea1a55d..ac67771c4 100644
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -53,7 +53,7 @@ void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
return;
visited[index] = true;
- for (const auto output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
const auto &operand = graph.operands().at(output);
for (const auto &use : operand.getUses())
@@ -86,7 +86,7 @@ void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph,
return;
visited[index] = true;
- for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED)
+ for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
const auto &operand = lowered_graph.graph().operands().at(output);
for (const auto &use : operand.getUses())
diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc
index 04be8c0d9..4af878541 100644
--- a/runtime/onert/core/src/ir/Operation.cc
+++ b/runtime/onert/core/src/ir/Operation.cc
@@ -24,22 +24,33 @@ namespace ir
{
Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs}
+ const OperandIndexSequence &outputs, OperandConstraint output_constr)
+ : _input_constr{input_constr}, _output_constr{output_constr}
{
+ setInputs(inputs);
+ setOutputs(outputs);
}
-Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {}
+Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr)
+ : _input_constr{input_constr}, _output_constr{output_constr}
+{
+}
Operation::~Operation() = default;
void Operation::setInputs(const OperandIndexSequence &indexes)
{
- assert(_input_constr.check(indexes.size()));
+ if (!_input_constr.check(indexes.size()))
+ throw std::runtime_error{"Invalid number of input tensors for this operation."};
_inputs = indexes;
}
-void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
+void Operation::setOutputs(const OperandIndexSequence &indexes)
+{
+ if (!_output_constr.check(indexes.size()))
+ throw std::runtime_error{"Invalid number of output tensors for this operation."};
+ _outputs = indexes;
+}
void Operation::replaceInputs(const OperandIndex &from, const OperandIndex &to)
{
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index 48361f464..eecfe81cc 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -40,7 +40,7 @@ void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "
void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(1)
<< ") " << adding_input << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
@@ -72,7 +72,7 @@ OperationDumper::OperationDumper(const std::string &start_msg)
VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const BatchToSpaceND &node)
{
@@ -82,6 +82,20 @@ void OperationDumper::visit(const BatchToSpaceND &node)
dumpUnaryInputOp(node, block_size);
}
+void OperationDumper::visit(const BCQFullyConnected &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(BCQFullyConnected::Input::INPUT)
+ << ") WeightsBinary("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_BINARY)
+ << ") WeightsScales("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_SCALES)
+ << ") WeightsClusters("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_CLUSTERS) << ") Bias("
+ << node.getInputs().at(BCQFullyConnected::Input::BIAS) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
@@ -185,6 +199,7 @@ void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryI
void OperationDumper::visit(const LSTM &node)
{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR)
<< " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
<< ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
@@ -209,12 +224,24 @@ void OperationDumper::visit(const LSTM &node)
<< node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
<< node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
<< node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
- << node.getInputs().at(LSTM::Input::CELL_STATE_IN) << ")" << std::endl;
+ << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
+ if (node.getInputs().size() == 24)
+ {
+ VERBOSE(LIR) << ") Input Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Forget Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Cell Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Ouput Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS);
+ }
+ VERBOSE(LIR) << ")" << std::endl;
VERBOSE(LIR) << " - Output : Scratch Buffer("
<< node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER) << ") Output State Out("
- << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
- << node.getInputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
- << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
+ << node.getOutputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
+ << node.getOutputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
+ << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
@@ -279,7 +306,37 @@ void OperationDumper::visit(const Reshape &node)
dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ResizeBilinear &node)
+{
+ if (node.getInputs().size() == 1)
+ {
+ dumpUnaryInputOp(node);
+ }
+ else if (node.getInputs().size() == 2)
+ {
+ dumpBinaryInputOp(node);
+ }
+ else
+ {
+ VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
+ }
+}
+
+void OperationDumper::visit(const ResizeNearestNeighbor &node)
+{
+ if (node.getInputs().size() == 1)
+ {
+ dumpUnaryInputOp(node);
+ }
+ else if (node.getInputs().size() == 2)
+ {
+ dumpBinaryInputOp(node);
+ }
+ else
+ {
+ VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
+ }
+}
void OperationDumper::visit(const Reverse &node)
{
@@ -336,7 +393,7 @@ void OperationDumper::visit(const SpaceToBatchND &node)
void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Split &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
@@ -384,7 +441,7 @@ void OperationDumper::visit(const TransposeConv &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Transpose &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const Unpack &node)
{
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index e8ab3b3cd..91642ab13 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -33,6 +33,7 @@ public:
public:
void visit(const operation::ArgMax &) override;
void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BCQFullyConnected &node) override;
void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::BroadcastTo &) override;
void visit(const operation::Comparison &) override;
@@ -65,6 +66,7 @@ public:
void visit(const operation::Reduce &) override;
void visit(const operation::Reshape &node) override;
void visit(const operation::ResizeBilinear &) override;
+ void visit(const operation::ResizeNearestNeighbor &) override;
void visit(const operation::Reverse &) override;
void visit(const operation::RNN &) override;
void visit(const operation::Select &node) override;
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMax.cc
index 1275ae43a..f3bd8fd73 100644
--- a/runtime/onert/core/src/ir/operation/ArgMax.cc
+++ b/runtime/onert/core/src/ir/operation/ArgMax.cc
@@ -31,7 +31,7 @@ void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
index 9ef2b125f..34be79dd2 100644
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -31,7 +31,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index 7dfcd4a98..6a0be7eb8 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -32,7 +32,9 @@ void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs,
+ OperandConstraint::createExact(1u)},
+ _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc
index c44f45aab..b8b97d1c0 100644
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -30,7 +30,7 @@ namespace operation
void Fill::accept(OperationVisitor &v) const { v.visit(*this); }
Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc
index 118ae554a..9837a3137 100644
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -31,7 +31,7 @@ void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc
index 30a865326..5cd7c793a 100644
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -31,10 +31,18 @@ void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
{
}
+std::string LSTM::name() const
+{
+ if (getOutputs().at(Output::SCRATCH_BUFFER).undefined())
+ return std::string{"UnidirectionalSequenceLSTM"};
+ else
+ return Operation::name();
+}
+
} // namespace operation
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc
index f0908a2c6..784d4162a 100644
--- a/runtime/onert/core/src/ir/operation/Pack.cc
+++ b/runtime/onert/core/src/ir/operation/Pack.cc
@@ -25,7 +25,7 @@ namespace operation
void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
index d0d89f45f..71925bb44 100644
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -31,7 +31,7 @@ void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 9f17af97c..98d0b5f26 100644
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -32,7 +32,7 @@ void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this);
ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc
index 244884e41..b538e9206 100644
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -25,7 +25,7 @@ namespace operation
void Split::accept(OperationVisitor &v) const { v.visit(*this); }
Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc
index 3a663fbce..997f98ab0 100644
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -29,9 +29,8 @@ namespace operation
void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
-Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc
index 09cbdcf2f..489845971 100644
--- a/runtime/onert/core/src/ir/verifier/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -51,7 +51,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
visited[index] = true;
on_stack[index] = true;
- for (auto output : node.getOutputs() | Remove::DUPLICATED)
+ for (auto output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
const auto &operand = graph.operands().at(output);
for (const auto &use : operand.getUses())
@@ -99,7 +99,7 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
errors += 1;
}
}
- for (auto operand_index : node.getOutputs())
+ for (auto operand_index : node.getOutputs() | ir::Remove::UNDEFINED)
{
try
{
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
index d09b95210..6c03a5b9a 100644
--- a/runtime/onert/core/src/util/EventCollectorGlobal.cc
+++ b/runtime/onert/core/src/util/EventCollectorGlobal.cc
@@ -21,6 +21,7 @@
#include <iostream>
#include "util/ConfigSource.h"
+#include "util/EventWriter.h"
namespace onert
{
@@ -39,8 +40,8 @@ EventCollectorGlobal::~EventCollectorGlobal()
try
{
// TODO Need better way for saved file path than the hardcoded path
- std::ofstream ofs{"trace.global.json"};
- _recorder.writeToFile(ofs);
+ EventWriter{_recorder}.writeToFile("trace.global.json",
+ EventWriter::WriteFormat::CHROME_TRACING);
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 13a599bed..3714e4f02 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -16,389 +16,6 @@
#include "util/EventRecorder.h"
-#include <sstream>
-#include <vector>
-#include <unordered_map>
-#include <json/json.h>
-#include <assert.h>
-#include <utility>
-#include <map>
-#include <set>
-#include <stdint.h>
-
-// json type for Chrome Event Trace
-namespace
-{
-
-std::string quote(const std::string &value)
-{
- std::stringstream ss;
- ss << '"' << value << '"';
- return ss.str();
-}
-
-std::string field(const std::string &k, const std::string &v)
-{
- std::stringstream ss;
- ss << quote(k) << " : " << quote(v);
- return ss.str();
-}
-
-struct Content // One Entry in Chrome Event Trace
-{
- std::vector<std::pair<std::string, std::string>> flds;
- std::vector<std::pair<std::string, std::string>> args;
-};
-
-std::string object(const Content &content)
-{
- std::stringstream ss;
-
- ss << "{ ";
-
- ss << field(content.flds[0].first, content.flds[0].second);
-
- for (uint32_t n = 1; n < content.flds.size(); ++n)
- {
- ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
- }
-
- if (content.args.size() > 0)
- {
- ss << ", " << quote("args") << " : { ";
- ss << field(content.args.at(0).first, content.args.at(0).second);
-
- for (uint32_t n = 1; n < content.args.size(); ++n)
- {
- ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
- }
-
- ss << "}";
- }
-
- ss << " }";
-
- return ss.str();
-}
-
-void fill(Content &content, const Event &evt)
-{
- content.flds.emplace_back("name", evt.name);
- content.flds.emplace_back("pid", "0");
- content.flds.emplace_back("tid", evt.tid);
- content.flds.emplace_back("ph", evt.ph);
- content.flds.emplace_back("ts", evt.ts);
-}
-
-std::string object(const DurationEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- return ::object(content);
-}
-
-std::string object(const CounterEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
- {
- content.args.emplace_back(it->first, it->second);
- }
-
- return ::object(content);
-}
-
-} // namespace
-
-// md table type
-namespace
-{
-
-void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
-{
- os << "| ";
- for (auto &key : list)
- {
- os << key << " | ";
- }
- os << "\n";
-}
-
-struct MDContent
-{
- std::string name;
- uint64_t begin_ts;
- uint64_t end_ts;
- uint32_t min_rss;
- uint32_t max_rss;
- uint32_t min_page_reclaims;
- uint32_t max_page_reclaims;
-
- MDContent()
- : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
- max_page_reclaims(0)
- {
- // DO NOTHING
- }
-
- virtual ~MDContent() = default;
-
- void updateRss(uint32_t rss)
- {
- if (min_rss == UINT32_MAX)
- min_rss = rss;
- if (max_rss == 0)
- max_rss = rss;
-
- if (min_rss > rss)
- min_rss = rss;
- else if (max_rss < rss)
- max_rss = rss;
- }
-
- void updateMinflt(uint32_t minflt)
- {
- if (min_page_reclaims == UINT32_MAX)
- min_page_reclaims = minflt;
- if (max_page_reclaims == 0)
- max_page_reclaims = minflt;
-
- if (min_page_reclaims > minflt)
- min_page_reclaims = minflt;
- else if (max_page_reclaims < minflt)
- max_page_reclaims = minflt;
- }
-
- virtual void write(std::ostream &os) const = 0;
-};
-
-struct OpSeq : public MDContent
-{
- std::string backend;
- uint64_t graph_latency;
-
- struct OpSeqCmp
- {
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
- {
- return lhs.begin_ts < rhs.begin_ts;
- }
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- };
-
- void write(std::ostream &os) const override
- {
- uint64_t opseq_latency = end_ts - begin_ts;
- double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
- writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
- std::to_string(min_rss), std::to_string(max_rss),
- std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
- }
-};
-
-struct Graph : public MDContent
-{
- std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
-
- void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
- {
- uint64_t graph_latency = end_ts - begin_ts;
- for (auto it : name_to_opseq)
- {
- auto opseq = it.second;
- opseq.graph_latency = graph_latency;
-
- opseqs.insert(opseq);
-
- updateRss(opseq.min_rss);
- updateRss(opseq.max_rss);
- updateMinflt(opseq.min_page_reclaims);
- updateMinflt(opseq.max_page_reclaims);
- }
- }
-
- void write(std::ostream &os) const override
- {
- static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
- "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
- "-----------------", "-----------------"};
-
- // Graph's Header
- writeMDTableRow(os, graph_headers);
- writeMDTableRow(os, graph_headers_line);
-
- // Graph's contents
- writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
- std::to_string(max_rss), std::to_string(min_page_reclaims),
- std::to_string(max_page_reclaims)});
-
- os << "\n";
-
- static std::vector<std::string> opseq_headers{
- "OpSeq name", "backend", "latency(us)", "latency(%)",
- "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> opseq_headers_line{
- "----------", "-------", "-----------", "-----------",
- "-------", "-------", "-----------------", "-----------------"};
-
- os << "## OpSequences \n";
-
- // OpSeq's Header
- writeMDTableRow(os, opseq_headers);
- writeMDTableRow(os, opseq_headers_line);
-
- // OpSeq's contents
- for (auto opseq : opseqs)
- {
- opseq.write(os);
- }
-
- os << "\n";
- }
-};
-
-struct MDTableBuilder
-{
- MDTableBuilder(const std::vector<DurationEvent> &duration_events,
- const std::vector<CounterEvent> &counter_events)
- : _duration_events(duration_events), _counter_events(counter_events)
- {
- for (const auto &evt : _counter_events)
- {
- uint64_t ts = std::stoull(evt.ts);
- auto &name = evt.name;
- assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
- assert(evt.values.size() == 1);
- auto &val = evt.values.begin()->second;
- if (_ts_to_values.find(ts) == _ts_to_values.end())
- {
- std::pair<uint32_t, uint32_t> values;
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- _ts_to_values.insert({ts, values});
- }
- else
- {
- auto &values = _ts_to_values.at(ts);
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- }
- }
- }
-
- MDTableBuilder &build()
- {
- for (auto &it : divideGraph())
- {
- size_t begin_idx = it.first;
- size_t end_idx = it.second;
- std::map<std::string, OpSeq> name_to_opseq;
- for (size_t i = begin_idx + 1; i < end_idx; ++i)
- {
- const auto &evt = _duration_events[i];
- assert(evt.name.compare("Graph") != 0);
- assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
- if (evt.ph.compare("B") == 0)
- {
- assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
- name_to_opseq.insert({evt.name, makeOpSeq(evt)});
- }
- else
- {
- assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
- auto &opseq = name_to_opseq.at(evt.name);
- updateOpSeq(opseq, evt);
- }
- }
-
- _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
- }
-
- return *this;
- }
-
- std::vector<std::pair<size_t, size_t>> divideGraph()
- {
- std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
- for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
- {
- const auto &evt = _duration_events.at(i);
- if (evt.name.compare("Graph") == 0)
- {
- if (evt.ph.compare("B") == 0)
- begin_idx = i;
- else
- graph_idx_list.emplace_back(begin_idx, i);
- }
- }
- return graph_idx_list;
- }
-
- OpSeq makeOpSeq(const DurationEvent &evt)
- {
- OpSeq opseq;
- opseq.name = evt.name;
- opseq.begin_ts = std::stoull(evt.ts);
- opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
- opseq.backend = evt.tid;
- return opseq;
- }
-
- void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
- {
- opseq.end_ts = std::stoull(evt.ts);
- opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
- }
-
- Graph makeGraph(size_t begin_idx, size_t end_idx,
- const std::map<std::string, OpSeq> &name_to_opseq)
- {
- Graph graph;
- graph.name = "Graph";
- graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
- graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
- graph.end_ts = std::stoull(_duration_events[end_idx].ts);
- graph.updateRss(_ts_to_values.at(graph.end_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
- graph.setOpSeqs(name_to_opseq);
- return graph;
- }
-
- void write(std::ostream &os)
- {
- // Write contents
- for (size_t i = 0; i < _graphs.size(); ++i)
- {
- os << "# Graph " << i << "\n";
- _graphs.at(i).write(os);
- }
- }
-
- const std::vector<DurationEvent> &_duration_events;
- const std::vector<CounterEvent> &_counter_events;
- // timestamp to std::pair<maxrss, minflt>
- std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
- std::vector<Graph> _graphs;
-};
-
-} // namespace
-
void EventRecorder::emit(const DurationEvent &evt)
{
std::lock_guard<std::mutex> lock{_mu};
@@ -412,146 +29,3 @@ void EventRecorder::emit(const CounterEvent &evt)
_counter_events.push_back(evt);
}
-
-void EventRecorder::writeToFile(std::ostream &os)
-{
- std::lock_guard<std::mutex> lock{_mu};
-
- switch (_write_format)
- {
- case WriteFormat::CHROME_TRACING:
- writeChromeTrace(os);
- break;
- case WriteFormat::SNPE_BENCHMARK:
- writeSNPEBenchmark(os);
- break;
- case WriteFormat::MD_TABLE:
- writeMDTable(os);
- break;
- default:
- assert(!"Invalid value");
- break;
- }
-}
-
-void EventRecorder::writeSNPEBenchmark(std::ostream &os)
-{
- Json::Value root;
- auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
-
- struct Stat
- {
- uint64_t sum = 0;
- uint64_t count = 0;
- uint64_t max = 0;
- uint64_t min = std::numeric_limits<uint64_t>::max();
-
- void accumulate(uint64_t val)
- {
- sum += val;
- count++;
- max = std::max(max, val);
- min = std::min(min, val);
- }
- };
-
- // Memory
- {
- std::unordered_map<std::string, Stat> mem_stats;
- for (auto &evt : _counter_events)
- {
- auto &mem_stat = mem_stats[evt.name];
- uint64_t val = std::stoull(evt.values["value"]);
- mem_stat.accumulate(val);
- }
-
- auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
- for (auto &kv : mem_stats)
- {
- auto &key = kv.first;
- auto &val = kv.second;
- mem[key]["Avg_Size"] = val.sum / val.count;
- mem[key]["Max_Size"] = val.max;
- mem[key]["Min_Size"] = val.min;
- mem[key]["Runtime"] = "NA";
- }
- }
-
- // Operation Execution Time
- {
- // NOTE This assumes _duration_events is sorted by "ts" ascending
-
- // 2D keys : stats[tid][name]
- std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
- std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &evt : _duration_events)
- {
- auto &stat = stats[evt.tid][evt.name];
- auto &begin_ts = begin_timestamps[evt.tid][evt.name];
- uint64_t timestamp = std::stoull(evt.ts);
- if (evt.ph == "B")
- {
- if (begin_ts != 0)
- throw std::runtime_error{"Invalid Data"};
- begin_ts = timestamp;
- }
- else if (evt.ph == "E")
- {
- if (begin_ts == 0 || timestamp < begin_ts)
- throw std::runtime_error{"Invalid Data"};
- stat.accumulate(timestamp - begin_ts);
- begin_ts = 0;
- }
- else
- throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
- }
-
- for (auto &kv : begin_timestamps)
- for (auto &kv2 : kv.second)
- if (kv2.second != 0)
- throw std::runtime_error{"Invalid Data - B and E pair does not match."};
-
- for (auto &kv : stats)
- {
- auto &tid = kv.first;
- auto &map = kv.second;
- auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
- for (auto &kv : map)
- {
- auto &name = kv.first;
- auto &val = kv.second;
- json_tid[name]["Avg_Time"] = val.sum / val.count;
- json_tid[name]["Max_Time"] = val.max;
- json_tid[name]["Min_Time"] = val.min;
- json_tid[name]["Runtime"] = tid;
- }
- }
- }
-
- os << root;
-}
-
-void EventRecorder::writeChromeTrace(std::ostream &os)
-{
- os << "{\n";
- os << " " << quote("traceEvents") << ": [\n";
-
- for (auto &evt : _duration_events)
- {
- os << " " << object(evt) << ",\n";
- }
-
- for (auto &evt : _counter_events)
- {
- os << " " << object(evt) << ",\n";
- }
-
- os << " { }\n";
- os << " ]\n";
- os << "}\n";
-}
-
-void EventRecorder::writeMDTable(std::ostream &os)
-{
- MDTableBuilder(_duration_events, _counter_events).build().write(os);
-}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 37ec1a0f1..7af4c7ddb 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -21,7 +21,6 @@
#include <memory>
#include <mutex>
-#include <ostream>
#include <vector>
struct Event
@@ -50,14 +49,6 @@ struct CounterEvent : public Event
class EventRecorder
{
public:
- enum class WriteFormat
- {
- CHROME_TRACING,
- SNPE_BENCHMARK,
- MD_TABLE,
- };
-
-public:
EventRecorder() = default;
public:
@@ -66,18 +57,11 @@ public:
public:
bool empty() { return _duration_events.empty() && _counter_events.empty(); }
- void writeToFile(std::ostream &os);
- void setWriteFormat(WriteFormat write_format) { _write_format = write_format; }
-
-private:
- void writeSNPEBenchmark(std::ostream &os);
- void writeChromeTrace(std::ostream &os);
- void writeMDTable(std::ostream &os);
+ const std::vector<DurationEvent> &duration_events() const { return _duration_events; }
+ const std::vector<CounterEvent> &counter_events() const { return _counter_events; }
private:
std::mutex _mu;
- // TODO: Allow user to control write_format
- WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK};
std::vector<DurationEvent> _duration_events;
std::vector<CounterEvent> _counter_events;
};
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
new file mode 100644
index 000000000..dacb40e64
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <sstream>
+#include <vector>
+#include <unordered_map>
+#include <json/json.h>
+#include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+#include <fstream>
+
+// json type for Chrome Event Trace
+namespace
+{
+
+std::string quote(const std::string &value)
+{
+ std::stringstream ss;
+ ss << '"' << value << '"';
+ return ss.str();
+}
+
+std::string field(const std::string &k, const std::string &v)
+{
+ std::stringstream ss;
+ ss << quote(k) << " : " << quote(v);
+ return ss.str();
+}
+
+struct Content // One Entry in Chrome Event Trace
+{
+ std::vector<std::pair<std::string, std::string>> flds;
+ std::vector<std::pair<std::string, std::string>> args;
+};
+
+std::string object(const Content &content)
+{
+ std::stringstream ss;
+
+ ss << "{ ";
+
+ ss << field(content.flds[0].first, content.flds[0].second);
+
+ for (uint32_t n = 1; n < content.flds.size(); ++n)
+ {
+ ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
+ }
+
+ if (content.args.size() > 0)
+ {
+ ss << ", " << quote("args") << " : { ";
+ ss << field(content.args.at(0).first, content.args.at(0).second);
+
+ for (uint32_t n = 1; n < content.args.size(); ++n)
+ {
+ ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
+ }
+
+ ss << "}";
+ }
+
+ ss << " }";
+
+ return ss.str();
+}
+
+void fill(Content &content, const Event &evt)
+{
+ content.flds.emplace_back("name", evt.name);
+ content.flds.emplace_back("pid", "0");
+ content.flds.emplace_back("tid", evt.tid);
+ content.flds.emplace_back("ph", evt.ph);
+ content.flds.emplace_back("ts", evt.ts);
+}
+
+std::string object(const DurationEvent &evt)
+{
+ Content content;
+
+ fill(content, evt);
+
+ return ::object(content);
+}
+
+std::string object(const CounterEvent &evt)
+{
+ Content content;
+
+ fill(content, evt);
+
+ for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
+ {
+ content.args.emplace_back(it->first, it->second);
+ }
+
+ return ::object(content);
+}
+
+} // namespace
+
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OpSeqCmp
+ {
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t opseq_latency = end_ts - begin_ts;
+ double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+ void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto it : name_to_opseq)
+ {
+ auto opseq = it.second;
+ opseq.graph_latency = graph_latency;
+
+ opseqs.insert(opseq);
+
+ updateRss(opseq.min_rss);
+ updateRss(opseq.max_rss);
+ updateMinflt(opseq.min_page_reclaims);
+ updateMinflt(opseq.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> opseq_headers{
+ "OpSeq name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> opseq_headers_line{
+ "----------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## OpSequences \n";
+
+ // OpSeq's Header
+ writeMDTableRow(os, opseq_headers);
+ writeMDTableRow(os, opseq_headers_line);
+
+ // OpSeq's contents
+ for (auto opseq : opseqs)
+ {
+ opseq.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+// when ready with low overhead in release build
+#ifdef DEBUG
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+#endif
+ }
+
+ MDTableBuilder &build()
+ {
+ for (auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, OpSeq> name_to_opseq;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto &evt = _duration_events[i];
+ assert(evt.name.compare("Graph") != 0);
+ assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+ if (evt.ph.compare("B") == 0)
+ {
+ assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+ name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+ }
+ else
+ {
+ assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+ auto &opseq = name_to_opseq.at(evt.name);
+ updateOpSeq(opseq, evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto &evt = _duration_events.at(i);
+ if (evt.name.compare("Graph") == 0)
+ {
+ if (evt.ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ }
+ return graph_idx_list;
+ }
+
+ OpSeq makeOpSeq(const DurationEvent &evt)
+ {
+ OpSeq opseq;
+ opseq.name = evt.name;
+ opseq.begin_ts = std::stoull(evt.ts);
+ opseq.backend = evt.tid;
+#ifdef DEBUG
+ opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+#else
+ opseq.updateRss(0);
+ opseq.updateMinflt(0);
+#endif
+ return opseq;
+ }
+
+ void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+ {
+ opseq.end_ts = std::stoull(evt.ts);
+#ifdef DEBUG
+ opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+#else
+ opseq.updateRss(0);
+ opseq.updateMinflt(0);
+#endif
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ Graph graph;
+ graph.name = "Graph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+ graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+ graph.setOpSeqs(name_to_opseq);
+#ifdef DEBUG
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+#else
+ graph.updateRss(0);
+ graph.updateMinflt(0);
+#endif
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ os << "# Graph " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<DurationEvent> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
+EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder)
+{
+ // DO NOTHING
+}
+
+void EventWriter::writeToFiles(const std::string &base_filepath)
+{
+ // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+ writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK);
+ writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING);
+ writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE);
+}
+
+void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format)
+{
+ std::ofstream os{filepath, std::ofstream::out};
+ switch (write_format)
+ {
+ case WriteFormat::CHROME_TRACING:
+ writeChromeTrace(os);
+ break;
+ case WriteFormat::SNPE_BENCHMARK:
+ writeSNPEBenchmark(os);
+ break;
+ case WriteFormat::MD_TABLE:
+ writeMDTable(os);
+ break;
+ default:
+ assert(!"Invalid value");
+ break;
+ }
+}
+
+void EventWriter::writeSNPEBenchmark(std::ostream &os)
+{
+ Json::Value root;
+ auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
+
+ struct Stat
+ {
+ uint64_t sum = 0;
+ uint64_t count = 0;
+ uint64_t max = 0;
+ uint64_t min = std::numeric_limits<uint64_t>::max();
+
+ void accumulate(uint64_t val)
+ {
+ sum += val;
+ count++;
+ max = std::max(max, val);
+ min = std::min(min, val);
+ }
+ };
+
+ // Memory
+ {
+ std::unordered_map<std::string, Stat> mem_stats;
+ for (auto &evt : _recorder.counter_events())
+ {
+ auto &mem_stat = mem_stats[evt.name];
+ uint64_t val = std::stoull(evt.values.at("value"));
+ mem_stat.accumulate(val);
+ }
+
+ auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
+ for (auto &kv : mem_stats)
+ {
+ auto &key = kv.first;
+ auto &val = kv.second;
+ mem[key]["Avg_Size"] = val.sum / val.count;
+ mem[key]["Max_Size"] = val.max;
+ mem[key]["Min_Size"] = val.min;
+ mem[key]["Runtime"] = "NA";
+ }
+ }
+
+ // Operation Execution Time
+ {
+ // NOTE This assumes _duration_events is sorted by "ts" ascending
+
+ // 2D keys : stats[tid][name]
+ std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
+ std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
+ for (auto &evt : _recorder.duration_events())
+ {
+ auto &stat = stats[evt.tid][evt.name];
+ auto &begin_ts = begin_timestamps[evt.tid][evt.name];
+ uint64_t timestamp = std::stoull(evt.ts);
+ if (evt.ph == "B")
+ {
+ if (begin_ts != 0)
+ throw std::runtime_error{"Invalid Data"};
+ begin_ts = timestamp;
+ }
+ else if (evt.ph == "E")
+ {
+ if (begin_ts == 0 || timestamp < begin_ts)
+ throw std::runtime_error{"Invalid Data"};
+ stat.accumulate(timestamp - begin_ts);
+ begin_ts = 0;
+ }
+ else
+ throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
+ }
+
+ for (auto &kv : begin_timestamps)
+ for (auto &kv2 : kv.second)
+ if (kv2.second != 0)
+ throw std::runtime_error{"Invalid Data - B and E pair does not match."};
+
+ for (auto &kv : stats)
+ {
+ auto &tid = kv.first;
+ auto &map = kv.second;
+ auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
+ for (auto &kv : map)
+ {
+ auto &name = kv.first;
+ auto &val = kv.second;
+ json_tid[name]["Avg_Time"] = val.sum / val.count;
+ json_tid[name]["Max_Time"] = val.max;
+ json_tid[name]["Min_Time"] = val.min;
+ json_tid[name]["Runtime"] = tid;
+ }
+ }
+ }
+
+ os << root;
+}
+
+void EventWriter::writeChromeTrace(std::ostream &os)
+{
+ os << "{\n";
+ os << " " << quote("traceEvents") << ": [\n";
+
+ for (auto &evt : _recorder.duration_events())
+ {
+ os << " " << object(evt) << ",\n";
+ }
+
+ for (auto &evt : _recorder.counter_events())
+ {
+ os << " " << object(evt) << ",\n";
+ }
+
+ os << " { }\n";
+ os << " ]\n";
+ os << "}\n";
+}
+
+void EventWriter::writeMDTable(std::ostream &os)
+{
+ MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os);
+}
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h
new file mode 100644
index 000000000..7e838ca82
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_EVENT_WRITER_H__
+#define __ONERT_UTIL_EVENT_WRITER_H__
+
+#include "EventRecorder.h"
+
+#include <string>
+#include <ostream>
+
+class EventWriter
+{
+public:
+ enum class WriteFormat
+ {
+ CHROME_TRACING,
+ SNPE_BENCHMARK,
+ MD_TABLE,
+ };
+
+public:
+ EventWriter(const EventRecorder &recorder);
+
+public:
+ void writeToFiles(const std::string &base_filepath);
+ void writeToFile(const std::string &filepath, WriteFormat write_format);
+
+private:
+ void writeSNPEBenchmark(std::ostream &os);
+ void writeChromeTrace(std::ostream &os);
+ void writeMDTable(std::ostream &os);
+
+private:
+ const EventRecorder &_recorder;
+};
+
+#endif // __ONERT_UTIL_EVENT_WRITER_H__
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 95c15049d..0278df4d2 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -22,6 +22,7 @@
#include "util/logging.h"
#include <cassert>
+#include <numeric>
#include <sstream>
#include <cmath>
@@ -72,6 +73,19 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
} // namespace
+namespace bcq
+{
+inline int getOutputSize(const ir::Shape &cluster_shape, const int32_t *cluster_buf)
+{
+ int size = 0;
+ for (int idx = 0; idx < cluster_shape.dim(0); idx++)
+ {
+ size += cluster_buf[idx * 2 + 1];
+ }
+ return size;
+}
+} // namespace bcq
+
//
// Shape inference
//
@@ -116,6 +130,11 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
{
+ if (axis < 0 || axis >= rank)
+ {
+ throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis));
+ }
+
ir::Shape out_shape;
for (int idx = 0; idx < rank; ++idx)
{
@@ -259,19 +278,24 @@ ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs
return output_shape;
}
-ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer)
+/*
+ * shp_shape : SHAPE input tensor's shape
+ * shp_buf : SHAPE input tensor's buffer
+ */
+ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf)
{
- const int num_elements = wshape.num_elements();
+
+ const int num_elements = shp_shape.num_elements();
assert(num_elements != 0);
- assert(shape_buffer);
+ assert(shp_buf);
ir::Shape new_shape(num_elements);
for (int i = 0; i < num_elements; ++i)
{
- assert(shape_buffer[i] != 0); // It shouldn't be 0.
- new_shape.dim(i) = shape_buffer[i];
+ assert(shp_buf[i] != 0); // It shouldn't be 0.
+ new_shape.dim(i) = shp_buf[i];
}
return new_shape;
@@ -305,6 +329,9 @@ ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat:
ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
const ir::operation::Conv2D::Param &param, ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"Conv2D: stride values must be positive"};
+
auto ifm_shape = in_shape.asFeature(layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in]
@@ -321,6 +348,9 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &
const ir::operation::DepthwiseConv2D::Param &param,
ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"DepthwiseConv2D: stride values must be positive"};
+
assert(layout == ir::Layout::NHWC);
auto ifm_shape = in_shape.asFeature(layout);
@@ -354,13 +384,13 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis)
return out_shape;
}
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buffer)
+ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf)
{
ir::Shape out_shape(in_shape.dim(0));
for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
{
- out_shape.dim(out_x) = buffer[out_x];
+ out_shape.dim(out_x) = in_buf[out_x];
}
return out_shape;
@@ -380,11 +410,60 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
return {ir::Shape({static_cast<int32_t>(batch_size), num_units})};
}
+ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf)
+{
+ assert(cluster_shape.rank() == 2);
+ assert(cluster_shape.dim(1) == 2);
+
+ const auto input_size = in_shape.dim(1);
+ const auto output_size = bcq::getOutputSize(cluster_shape, cluster_buf);
+
+ return {ir::Shape({output_size, input_size})};
+}
+
+ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf, int rank,
+ const ir::operation::BCQGather::Param &param)
+{
+ ir::Shape out_shape;
+ ir::Shape in_original_shape;
+
+ assert(cluster_shape.rank() == 2);
+ assert(cluster_shape.dim(1) == 2);
+
+ auto hidden_size = param.input_hidden_size;
+ auto axis = param.axis;
+
+ in_original_shape.append(bcq::getOutputSize(cluster_shape, cluster_buf));
+ in_original_shape.append(hidden_size);
+
+ const int indices_rank = indices_shape.rank();
+ for (int idx = 0; idx < rank; ++idx)
+ {
+ if (idx == (int)axis)
+ {
+ for (int indices_idx = 0; indices_idx < indices_rank; indices_idx++)
+ {
+ out_shape.append(indices_shape.dim(indices_idx));
+ }
+ }
+ else
+ {
+ out_shape.append(in_original_shape.dim(idx));
+ }
+ }
+
+ return out_shape;
+}
+
ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
int rank)
{
ir::Shape out_shape;
+
const int indices_rank = indices_shape.rank();
+
for (int idx = 0; idx < rank; ++idx)
{
if (idx == axis)
@@ -470,6 +549,9 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
const ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"Pool2D: stride values must be positive"};
+
assert(layout == ir::Layout::NHWC);
auto ifm_shape = in_shape.asFeature(layout);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
@@ -482,6 +564,17 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
const int32_t output_width)
{
assert(in_shape.rank() == 4);
+ if (output_height < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " +
+ std::to_string(output_height)};
+ }
+ if (output_width < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " +
+ std::to_string(output_width)};
+ }
+
ir::Shape ret(in_shape.rank());
ret.dim(0) = in_shape.dim(0);
@@ -613,7 +706,8 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
return new_shape;
}
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, const int32_t *sizes)
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+ const int32_t *sizes_buf)
{
const uint32_t rank = input_shape.rank();
ir::Shape out_shape(rank);
@@ -623,12 +717,12 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
const auto input_dim = input_shape.dim(idx);
// begin is zero-based
- auto begin = begins[idx];
+ auto begin = begins_buf[idx];
if (begin < 0)
throw std::runtime_error("shape inference Slice: Invalid begin.");
// size is one-based
- auto size = sizes[idx];
+ auto size = sizes_buf[idx];
if (size < -1)
throw std::runtime_error("shape inference Slice: Invalid size.");
@@ -648,8 +742,8 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
}
ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
- const ir::Shape &padding_shape, const int32_t *block_shape_data,
- const int32_t *padding_data)
+ const ir::Shape &padding_shape, const int32_t *block_shape_buf,
+ const int32_t *padding_buf)
{
const uint32_t rank = input_shape.rank();
ir::Shape out_shape(rank);
@@ -677,14 +771,14 @@ ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape
for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
{
int final_dim_size =
- (input_shape.dim(dim + 1) + padding_data[dim * 2] + padding_data[dim * 2 + 1]);
+ (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
- assert(final_dim_size % block_shape_data[dim] == 0);
+ assert(final_dim_size % block_shape_buf[dim] == 0);
- out_shape.dim(dim + 1) = final_dim_size / block_shape_data[dim];
+ out_shape.dim(dim + 1) = final_dim_size / block_shape_buf[dim];
}
- const int output_batch_size = input_shape.dim(0) * block_shape_data[0] * block_shape_data[1];
+ const int output_batch_size = input_shape.dim(0) * block_shape_buf[0] * block_shape_buf[1];
const int output_channel_size = input_shape.dim(3);
out_shape.dim(0) = output_batch_size;
@@ -948,35 +1042,71 @@ ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSlic
return out_shape;
}
-ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier)
+ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf,
+ const int32_t multiplier_size)
{
- // assert(in_shape.rank() == multiplier.rank());
+ if (multiplier_size != in_shape.rank())
+ {
+ throw std::runtime_error("inferTileShape failed, input rank: " +
+ std::to_string(in_shape.rank()) + ", bad multipliers size: " +
+ std::to_string(multiplier_size) + "");
+ }
ir::Shape new_Shape(in_shape.rank());
for (int i = 0; i < in_shape.rank(); ++i)
{
- assert(multiplier[i]); // multiplier[i] shuld not be 0.
- new_Shape.dim(i) = in_shape.dim(i) * multiplier[i];
+ assert(multiplier_buf[i]); // multiplier_buf[i] shuld not be 0.
+ new_Shape.dim(i) = in_shape.dim(i) * multiplier_buf[i];
}
return new_Shape;
}
-ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm)
+ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf,
+ const int32_t perm_size)
{
- if (static_cast<int>(perm.size()) > in_shape.rank())
+ const auto rank = in_shape.rank();
+ if (perm_size > rank)
+ {
+ throw std::runtime_error("inferTransposeShape failed, bad permutation size: " +
+ std::to_string(perm_size));
+ }
+
+ const int32_t *perm_data = perm_buf;
+ std::vector<int32_t> regular_perm_vec;
+ if (perm_size == 0)
+ {
+ // perm_data will be set to (n-1...0)
+ regular_perm_vec.resize(rank);
+ std::iota(regular_perm_vec.begin(), regular_perm_vec.end(), 0);
+ std::reverse(regular_perm_vec.begin(), regular_perm_vec.end());
+ perm_data = regular_perm_vec.data();
+ }
+ else
{
- throw std::runtime_error("inferTransposeShape failed, bad rank size: " +
- std::to_string(static_cast<int>(perm.size())));
+ assert(rank == perm_size);
}
- ir::Shape out_shape(static_cast<int>(perm.size()));
- for (int idx = 0; idx < static_cast<int>(perm.size()); idx++)
+
+ ir::Shape out_shape(rank);
+ std::vector<bool> visit_perms(rank, false);
+ for (int idx = 0; idx < rank; idx++)
{
- if (perm[idx] < 0 || perm[idx] >= static_cast<int>(perm.size()))
+ const auto perm_val = perm_data[idx];
+ // Check invalid permutation value
+ if (perm_val < 0 || perm_val >= rank)
{
- throw std::runtime_error("inferTransposeShape failed, bad perm value: " +
- std::to_string(perm[idx]));
+ throw std::runtime_error("inferTransposeShape failed, bad permutation value: " +
+ std::to_string(perm_val));
}
- out_shape.dim(idx) = in_shape.dim(perm[idx]);
+
+ // Check duplicated permutation value
+ if (visit_perms.at(perm_val))
+ {
+ throw std::runtime_error("inferTransposeShape failed, duplicated permutation value: " +
+ std::to_string(perm_val));
+ }
+ visit_perms.at(perm_val) = true;
+
+ out_shape.dim(idx) = in_shape.dim(perm_val);
}
return out_shape;
}
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 480452e01..d21001e59 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -1,4 +1,5 @@
/*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -38,7 +39,7 @@ namespace onert
namespace base_loader
{
-template <typename LoaderDomain, typename SpecificLoader> class BaseLoader
+template <typename LoaderDomain> class BaseLoader
{
protected:
using Verifier = typename LoaderDomain::Verifier;
@@ -69,6 +70,7 @@ public:
explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
: _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
{
+ _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
}
/**
@@ -93,7 +95,6 @@ protected:
ir::Activation convertActivation(ActivationFunctionType type);
ir::DataType tensorTypeToDataType(TensorType type);
ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx);
- void deallocateMmappedArea(uint8_t *ptr, size_t size);
// Create operands form tflite::Tensor
ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
@@ -107,7 +108,11 @@ protected:
// Load Pool2D param
template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
+private:
+ virtual std::unique_ptr<ir::Graph> loadSubgraph(const SubGraph *subg) = 0;
// Operations
+ template <typename OpIR, typename... Args>
+ const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args);
void loadConv2D(const Operator *op, ir::Graph &subg);
void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
void loadTransposeConv(const Operator *op, ir::Graph &subg);
@@ -115,62 +120,50 @@ protected:
void loadReshape(const Operator *op, ir::Graph &subg);
void loadSoftmax(const Operator *op, ir::Graph &subg);
void loadConcatenation(const Operator *op, ir::Graph &subg);
- void loadFill(const Operator *op, ir::Graph &subg);
void loadFC(const Operator *op, ir::Graph &subg);
- template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
- void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+ void loadBinaryArithmetic(const Operator *op, ir::Graph &subg,
+ ir::operation::BinaryArithmetic::ArithmeticType op_type);
void loadAddV2(const Operator *op, ir::Graph &subg);
void loadPack(const Operator *op, ir::Graph &subg);
void loadResizeBilinear(const Operator *op, ir::Graph &subg);
void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
- void loadSelect(const Operator *op, ir::Graph &subg);
- void loadSquaredDifference(const Operator *op, ir::Graph &subg);
- void loadTranspose(const Operator *op, ir::Graph &subg);
- template <ir::operation::Reduce::ReduceType reduce_type>
- void loadReduce(const Operator *op, ir::Graph &subg);
+ void loadReduce(const Operator *op, ir::Graph &subg,
+ ir::operation::Reduce::ReduceType reduce_type);
void loadReduceAll(const Operator *op, ir::Graph &subg);
- void loadReverseV2(const Operator *op, ir::Graph &subg);
- void loadPad(const Operator *op, ir::Graph &subg);
void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseActivation::Type op_type,
float alpha = 0.f, float beta = 0.f);
- template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
- void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+ void loadElementwiseBinary(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type);
void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseUnary::Type op_type);
- void loadExpandDims(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
void loadCustom(const Operator *op, ir::Graph &subg);
- void loadSpaceToBatchND(const Operator *op, ir::Graph &subg);
void loadBatchMatMul(const Operator *op, ir::Graph &subg);
- void loadBatchToSpaceND(const Operator *op, ir::Graph &subg);
void loadSqueeze(const Operator *op, ir::Graph &subg);
- void loadPrelu(const Operator *op, ir::Graph &subg);
void loadSplit(const Operator *op, ir::Graph &subg);
void loadSplitV(const Operator *op, ir::Graph &subg);
- void loadSlice(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
void loadComparison(const Operator *op, ir::Graph &subg);
void loadEinsum(const Operator *op, ir::Graph &subg);
void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadShape(const Operator *op, ir::Graph &subg);
void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadPow(const Operator *op, ir::Graph &subg);
- void loadTile(const Operator *op, ir::Graph &subg);
- void loadRange(const Operator *op, ir::Graph &subg);
- void loadRank(const Operator *op, ir::Graph &subg);
- void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
- void loadBroadcastTo(const Operator *op, ir::Graph &subg);
void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
- void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
- void loadL2Normalization(const Operator *op, ir::Graph &subg);
void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+ void verifySubgraphIndex(int subg_index)
+ {
+ const auto num_subgraphs = _model->subgraphs()->size();
+ if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs))
+ throw std::runtime_error{std::string{"Invalid subgraph index - "} +
+ std::to_string(subg_index)};
+ }
+
protected:
// Base address for mapped region for loading (if needed)
uint8_t *_base;
@@ -186,10 +179,12 @@ protected:
std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
+ // Boolean flag to use MMAPED_DATA
+ bool _use_mmaped_data = false;
};
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const char *file_path)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const char *file_path)
{
_fd = open(file_path, O_RDONLY);
if (_fd < 0)
@@ -216,22 +211,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const ch
_verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
loadModel();
+ munmap(_base, size);
close(_fd);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromBuffer(uint8_t *buffer,
- size_t size)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromBuffer(uint8_t *buffer, size_t size)
{
_base = buffer;
_verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
loadModel();
}
-template <typename LoaderDomain, typename SpecificLoader>
-ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
- const ActivationFunctionType type)
+template <typename LoaderDomain>
+ir::Activation
+BaseLoader<LoaderDomain>::BaseLoader::convertActivation(const ActivationFunctionType type)
{
switch (type)
{
@@ -246,14 +241,13 @@ ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActi
case ActivationFunctionType::ActivationFunctionType_TANH:
return ir::Activation::TANH;
default:
- throw std::runtime_error(std::string("Unsupported activation type: ")
- .append(EnumNameActivationFunctionType(type)));
+ throw std::runtime_error(std::string("Unsupported or invalid activation type: ") +
+ std::to_string(static_cast<int>(type)));
}
}
-template <typename LoaderDomain, typename SpecificLoader>
-ir::DataType
-BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const TensorType type)
+template <typename LoaderDomain>
+ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const TensorType type)
{
switch (type)
{
@@ -275,39 +269,13 @@ BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const
}
}
-template <typename LoaderDomain, typename SpecificLoader>
-ir::OperandIndex
-BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorIdxToOperandIdx(int32_t tensorIdx)
+template <typename LoaderDomain>
+ir::OperandIndex BaseLoader<LoaderDomain>::BaseLoader::tensorIdxToOperandIdx(int32_t tensorIdx)
{
return isOptionalInputTensor(tensorIdx) ? ir::OperandIndex() : _tensor_to_operand[tensorIdx];
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::deallocateMmappedArea(uint8_t *ptr,
- size_t size)
-{
- // Calculate offset from base address of mapped region
- ptrdiff_t unaligned_offset_start = ptr - _base;
- ptrdiff_t unaligned_offset_end = unaligned_offset_start + size;
-
- // Calculated aligned offset from base address of mapped region
- // munmap accepts memory address which is a multiple of the pagesize
- ptrdiff_t aligned_offset_start =
- ((unaligned_offset_start + (_pagesize - 1)) / _pagesize) * _pagesize;
- ptrdiff_t aligned_offset_end = (unaligned_offset_end / _pagesize) * _pagesize;
-
- ptrdiff_t area_size = aligned_offset_end - aligned_offset_start;
- if (area_size > 0)
- {
- // Unmap mapped region for CachedData
- if (munmap(_base + aligned_offset_start, area_size) == -1)
- {
- VERBOSE(BASE_LOADER) << "munmap failed" << std::endl;
- }
- }
-}
-
-/* Copied from tensorflow lite. Need to append copyright */
+/* Copy is copied from tensorflow lite */
template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
{
if (data_ptr->values() == nullptr)
@@ -324,9 +292,8 @@ template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
return true;
}
-template <typename LoaderDomain, typename SpecificLoader>
-ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor,
- ir::Graph &subg)
+template <typename LoaderDomain>
+ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir::Graph &subg)
{
ir::Shape shape;
// Shape
@@ -386,18 +353,44 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
{
std::vector<uint16_t> w1_segments;
std::vector<uint16_t> w1_indices;
- // ignore traversal_order, block_map
+ // check traversal_order
+ if (src_sparsity->traversal_order())
+ {
+ const int traversal_order_size = src_sparsity->traversal_order()->size();
+ for (int i = 0; i < traversal_order_size; ++i)
+ {
+ if (i != src_sparsity->traversal_order()->Get(i))
+ throw std::runtime_error("traversal_order [0, 1, ..., n-1] is only supported.");
+ }
+ }
+ // check block_map
+ int block_rank = 0;
+ if (src_sparsity->block_map())
+ {
+ block_rank = src_sparsity->block_map()->size();
+ for (int i = 0; i < block_rank; ++i)
+ {
+ if (i != src_sparsity->block_map()->Get(i))
+ throw std::runtime_error("block_map [0, 1, ..., n-1] is only supported.");
+ }
+ }
// load metadata
- const size_t dim_metadata_size = src_sparsity->dim_metadata()->size();
- if (dim_metadata_size != 2)
- throw std::runtime_error("sparse tensor is supported only for 2D");
+ const int dim_metadata_size = src_sparsity->dim_metadata()->size();
+ auto dense_rank = shape.rank();
+ if (dense_rank + block_rank != dim_metadata_size)
+ throw std::runtime_error("sparsity dim_metadata length is wrong.");
+ bool random_sparsity = dim_metadata_size == 2 && block_rank == 0;
+ bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2;
+ if (dim_metadata_size != !random_sparsity && !block2D_sparsity)
+ throw std::runtime_error(
+ "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
+
const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
if (src_metadata->format() != DimensionType::DimensionType_DENSE)
throw std::runtime_error("sparse tensor dim[0] is not DENSE");
src_metadata = src_sparsity->dim_metadata()->Get(1);
if (src_metadata->format() != DimensionType::DimensionType_SPARSE_CSR)
throw std::runtime_error("sparse tensor dim[0] is not SPARSE_CSR");
-
auto ParseSparseIndexVector = [src_metadata, &w1_segments, &w1_indices]() {
if (src_metadata->array_segments() == nullptr || src_metadata->array_indices() == nullptr)
return false;
@@ -433,7 +426,17 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
};
if (ParseSparseIndexVector() == false)
throw std::runtime_error("Error during parsing sparsity index information");
- type_info.sparse2DMetadata(std::move(w1_segments), std::move(w1_indices));
+ // Get block size
+ std::vector<int32_t> block_size;
+ for (int i = 0; i < block_rank; ++i)
+ {
+ auto block_metadata = src_sparsity->dim_metadata()->Get(dense_rank + i);
+ if (block_metadata->format() != DimensionType::DimensionType_DENSE)
+ throw std::runtime_error("block dimension must be DENSE.");
+ block_size.push_back(block_metadata->dense_size());
+ }
+ type_info.sparsity(std::make_shared<ir::Sparsity>(std::move(w1_segments), std::move(w1_indices),
+ std::move(block_size)));
}
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
@@ -450,8 +453,28 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
}
else // Model is loaded(mmap'd) from a file
{
- data_obj = std::make_unique<ir::CachedData>(data->data(), data->size());
- deallocateMmappedArea(const_cast<uint8_t *>(data->data()), data->size());
+ size_t data_size = data->size();
+ ptrdiff_t unaligned_offset_start = data->data() - _base;
+ ptrdiff_t offset_end = unaligned_offset_start + data_size;
+
+ // Calculated aligned offset from base address of mapped region
+ // munmap accepts memory address which is a multiple of the pagesize
+ ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
+ size_t mmap_size = offset_end - aligned_offset_start;
+
+ if (_use_mmaped_data)
+ {
+ data_obj = std::make_unique<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
+ unaligned_offset_start, data_size);
+ }
+ else
+ {
+ size_t offset = unaligned_offset_start - aligned_offset_start;
+ uint8_t *mmap_base = static_cast<uint8_t *>(
+ mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
+ data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
+ munmap(mmap_base, mmap_size);
+ }
}
subg.setOperandValue(operand_index, std::move(data_obj));
}
@@ -465,10 +488,9 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
return operand_index;
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *op,
- ir::OperandIndexSequence &inputs,
- ir::OperandIndexSequence &outputs)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
+ ir::OperandIndexSequence &outputs)
{
for (const std::int32_t idx : *op->inputs())
{
@@ -490,120 +512,116 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *o
}
}
-template <typename LoaderDomain, typename SpecificLoader>
+template <typename LoaderDomain>
template <typename Param, typename OptionsType>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &param,
- const OptionsType *options)
+void BaseLoader<LoaderDomain>::loadStridesAndPaddings(Param &param, const OptionsType *options)
{
// Strides
param.stride.vertical = options->stride_h();
param.stride.horizontal = options->stride_w();
// Paddings
- if (options->padding() == Padding::Padding_SAME)
- param.padding.type = ir::PaddingType::SAME;
- if (options->padding() == Padding::Padding_VALID)
- param.padding.type = ir::PaddingType::VALID;
+ switch (options->padding())
+ {
+ case Padding::Padding_SAME:
+ param.padding.type = ir::PaddingType::SAME;
+ break;
+ case Padding::Padding_VALID:
+ param.padding.type = ir::PaddingType::VALID;
+ break;
+ default:
+ throw std::runtime_error{"Invalid padding type"};
+ }
// param paddings indexes unused
}
-template <typename LoaderDomain, typename SpecificLoader>
+template <typename LoaderDomain>
template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
- const Pool2DOptions *options)
+void BaseLoader<LoaderDomain>::loadPool2DOptions(Param &param, const Pool2DOptions *options)
{
// Strides and Paddings
+ if (options->stride_h() <= 0 || options->stride_w() <= 0)
+ throw std::runtime_error{"Invalid stride vertical or horizontal - both must be bigger than 0"};
loadStridesAndPaddings(param, options);
// Filter width and height
// Strides
+ if (options->filter_width() <= 0 || options->filter_height() <= 0)
+ throw std::runtime_error{"Invalid filter width or height - both must be bigger than 0"};
param.kw = options->filter_width();
param.kh = options->filter_height();
// Activation
param.activation = convertActivation(options->fused_activation_function());
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+template <typename OpIR, typename... Args>
+const OpIR *BaseLoader<LoaderDomain>::loadOperationTo(const Operator *op, ir::Graph &subg,
+ Args &&... args)
{
+ static_assert(sizeof...(args) <= 1, "You can't have more than 1 arguments!");
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
+ std::unique_ptr<OpIR> new_op(new OpIR(inputs, outputs, std::forward<Args>(args)...));
+ auto ret = new_op.get();
+ subg.addOperation(std::move(new_op));
+
+ return ret;
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadConv2D(const Operator *op, ir::Graph &subg)
+{
ir::operation::Conv2D::Param param;
const auto *options = op->builtin_options_as_Conv2DOptions();
param.activation = convertActivation(options->fused_activation_function());
loadStridesAndPaddings(param, options);
-
param.dilation.width_factor = options->dilation_w_factor();
param.dilation.height_factor = options->dilation_h_factor();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Conv2D>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDepthwiseConv2D(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthwiseConv2D(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::DepthwiseConv2D::Param param;
const auto *options = op->builtin_options_as_DepthwiseConv2DOptions();
param.activation = convertActivation(options->fused_activation_function());
loadStridesAndPaddings(param, options);
- // Multiplier
param.multiplier = options->depth_multiplier();
// Dilation h/w factor unused
- std::unique_ptr<ir::Operation> new_op(new ir::operation::DepthwiseConv2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+
+ loadOperationTo<ir::operation::DepthwiseConv2D>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadTransposeConv(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::TransposeConv::Param param;
const auto *options = op->builtin_options_as_TransposeConvOptions();
loadStridesAndPaddings(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::TransposeConv(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+
+ loadOperationTo<ir::operation::TransposeConv>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
- ir::operation::Pool2D::PoolType op_type)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadPool2D(const Operator *op, ir::Graph &subg,
+ ir::operation::Pool2D::PoolType op_type)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Pool2D::Param param;
param.op_type = op_type;
const auto *options = op->builtin_options_as_Pool2DOptions();
loadPool2DOptions(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Pool2D>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReshape(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Reshape::Param param{};
const auto *options = op->builtin_options_as_ReshapeOptions();
if (options != nullptr)
@@ -611,99 +629,64 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op, i
const auto *new_shape = options->new_shape();
if (new_shape)
{
- for (uint i = 0; i < new_shape->Length(); ++i)
+ for (uint i = 0; i < new_shape->size(); ++i)
{
param.new_shape.push_back(new_shape->Get(i));
}
}
}
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Reshape(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Reshape>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSoftmax(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Softmax::Param param;
const auto *options = op->builtin_options_as_SoftmaxOptions();
// Beta
param.beta = options->beta();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Softmax(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Softmax>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadConcatenation(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Concat::Param param;
const auto *options = op->builtin_options_as_ConcatenationOptions();
// Axis
param.axis = options->axis();
// activation unused
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Concat(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Concat>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFill(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadFC(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Fill(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ ir::operation::FullyConnected::Param param;
+ const auto *options = op->builtin_options_as_FullyConnectedOptions();
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ param.activation = convertActivation(options->fused_activation_function());
+ // weights_format unused
- loadOperationIO(op, inputs, outputs);
+ const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param);
- const auto &input_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::INPUT));
- auto &weights_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::WEIGHT));
+ const auto &input_operand =
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
+ auto &weights_operand =
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
{
weights_operand.type(ir::DataType::QUANT_INT8_SYMM);
}
-
- ir::operation::FullyConnected::Param param;
- const auto *options = op->builtin_options_as_FullyConnectedOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
- // weights_format unused
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::FullyConnected(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::BinaryArithmetic::Param param;
param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
@@ -722,21 +705,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir:
param.activation = convertActivation(fused_activation_func);
}
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::BinaryArithmetic(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::BinaryArithmetic>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadBinaryArithmetic(
+ const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::BinaryArithmetic::Param param;
param.arithmetic_type = op_type;
switch (op_type)
@@ -771,172 +746,66 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operat
break;
}
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::BinaryArithmetic(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::BinaryArithmetic>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadPack(const Operator *op, ir::Graph &subg)
{
- // This runtime_error will be removed if the one of backend supports this operation
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Pack::Param param;
const auto *options = op->builtin_options_as_PackOptions();
param.num = options->values_count();
param.axis = options->axis();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pack(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Pack>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseActivation(
const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
float alpha, float beta)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::ElementwiseActivation::Param param;
param.op_type = op_type;
param.alpha = alpha;
param.beta = beta;
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::ElementwiseActivation(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::ElementwiseActivation>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadResizeBilinear(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto size = inputs.at(1);
-
- // FIXME Handle ResizeBilinearOptions.
- if (!subg.operands().at(size).isConstant())
- throw std::runtime_error("ResizeBilinear: non-constant 'size' is not supported.");
-
- std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
-
ir::operation::ResizeBilinear::Param param;
- param.height_out = size_v[0];
- param.width_out = size_v[1];
param.align_corners = op->builtin_options_as_ResizeBilinearOptions()->align_corners();
param.half_pixel_centers = op->builtin_options_as_ResizeBilinearOptions()->half_pixel_centers();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::ResizeBilinear>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto size = inputs.at(1);
-
- if (!subg.operands().at(size).isConstant())
- throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
-
- std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
-
ir::operation::ResizeNearestNeighbor::Param param;
- param.height_out = size_v[0];
- param.width_out = size_v[1];
param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::ResizeNearestNeighbor>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReduce(const Operator *op, ir::Graph &subg,
+ ir::operation::Reduce::ReduceType reduce_type)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SquaredDifference(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto perm = inputs.at(1);
-
- if (!subg.operands().at(perm).isConstant())
- throw std::runtime_error("Transpose: non-constant 'perm' is not supported.");
-
- ir::operation::Transpose::Param param;
- param.perm = subg.operands().at(perm).template asVector<int>();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Transpose({input}, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::Reduce::ReduceType reduce_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Reduce::Param param;
param.reduce_type = reduce_type;
param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Reduce(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Reduce>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceAll(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Reduce::Param param;
param.reduce_type = ir::operation::Reduce::ReduceType::ALL;
if (op->custom_options() == nullptr)
@@ -952,64 +821,28 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceAll(const Operator *op,
param.keep_dims = attr_map["keep_dims"].AsBool();
}
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Reduce(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReverseV2(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Reverse(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pad(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Reduce>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseBinary(
+ const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::ElementwiseBinary::Param param;
param.op_type = op_type;
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::ElementwiseBinary(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::ElementwiseBinary>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
- const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseUnary::Type op_type)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::ElementwiseUnary::Param param;
param.op_type = op_type;
+ const auto eu = loadOperationTo<ir::operation::ElementwiseUnary>(op, subg, param);
if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
{
auto qasymm8ToUint8 = [](ir::Operand &operand) {
@@ -1018,61 +851,24 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
operand.type(ir::DataType::UINT8);
}
};
- qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
- qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+ qasymm8ToUint8(
+ subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0)));
}
-
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::ElementwiseUnary(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExpandDims(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ExpandDims(inputs, outputs));
- subg.addOperation(std::move(new_op));
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadGather(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
ir::operation::Gather::Param param;
param.axis = op->builtin_options_as_GatherOptions()->axis();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Gather(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToBatchND(const Operator *op,
- ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::SpaceToBatchND{inputs, outputs}};
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Gather>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
ir::operation::BatchMatMul::Param param;
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1105,89 +901,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchMatMul(const Operator *o
" as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
}
- std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchMatMul{inputs, outputs, param}};
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator *op,
- ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchToSpaceND{inputs, outputs}};
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
- ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::BatchMatMul>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
ir::operation::SpaceToDepth::Param param;
-
const auto *options = op->builtin_options_as_SpaceToDepthOptions();
-
param.block_size = options->block_size();
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SpaceToDepth(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const Operator *op,
- ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::StatelessRandomUniform(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::SpaceToDepth>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
@@ -1237,7 +965,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
loadReduceAll(op, subg);
break;
case BuiltinOP::MatrixBandPart:
- loadMatrixBandPart(op, subg);
+ loadOperationTo<ir::operation::MatrixBandPart>(op, subg);
break;
case BuiltinOP::BatchMatMul:
loadBatchMatMul(op, subg);
@@ -1246,13 +974,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
loadEinsum(op, subg);
break;
case BuiltinOP::BroadcastTo:
- loadBroadcastTo(op, subg);
+ loadOperationTo<ir::operation::BroadcastTo>(op, subg);
break;
case BuiltinOP::FusedBatchNorm:
loadFusedBatchNorm(op, subg);
break;
case BuiltinOP::StatelessRandomUniform:
- loadStatelessRandomUniform(op, subg);
+ loadOperationTo<ir::operation::StatelessRandomUniform>(op, subg);
break;
case BuiltinOP::Erf:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
@@ -1285,141 +1013,71 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
}
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqueeze(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSqueeze(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Squeeze::Param param{};
+ ir::operation::Squeeze::Param param;
const auto *options = op->builtin_options_as_SqueezeOptions();
const auto *dims = options->squeeze_dims();
if (dims)
{
- if (dims->Length() > sizeof(param.dims) / sizeof(param.dims[0]))
+ if (dims->size() > sizeof(param.dims) / sizeof(param.dims[0]))
throw std::runtime_error("Squeeze: 'param.ndims' is out of range.");
- param.ndim = dims->Length();
+ param.ndim = dims->size();
for (int i = 0; i < param.ndim; ++i)
param.dims[i] = dims->Get(i);
}
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Squeeze(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Squeeze>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPrelu(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSplit(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::PReLU(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
- // Notice : input order is strange for tflite split
- auto input = inputs.at(1);
- auto axis = inputs.at(0);
-
- // FIXME Handle SplitOptions.
- if (!subg.operands().at(axis).isConstant())
- throw std::runtime_error("Split: non-constant 'axis' is not supported.");
-
- ir::operation::Split::Param param{};
- param.axis = subg.operands().at(axis).template asScalar<int>();
+ ir::operation::Split::Param param;
const auto *options = op->builtin_options_as_SplitOptions();
param.num_splits = options->num_splits();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Split({input}, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Split>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSplitV(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSplitV(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::SplitV::Param param{};
-
+ ir::operation::SplitV::Param param;
const auto *options = op->builtin_options_as_SplitVOptions();
param.num_splits = options->num_splits();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SplitV(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op{new ir::operation::Slice{inputs, outputs}};
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::SplitV>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridedSlice(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadStridedSlice(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::StridedSlice::Param param;
-
const auto *options = op->builtin_options_as_StridedSliceOptions();
param.begin_mask = options->begin_mask();
param.end_mask = options->end_mask();
param.shrink_axis_mask = options->shrink_axis_mask();
- std::unique_ptr<ir::Operation> new_op{new ir::operation::StridedSlice{inputs, outputs, param}};
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::StridedSlice>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadUnpack(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Unpack::Param param;
const auto *options = op->builtin_options_as_UnpackOptions();
param.num = options->num();
param.axis = options->axis();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Unpack(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Unpack>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::Comparison::Param param;
-
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
switch (builtin_op)
@@ -1447,24 +1105,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
}
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Comparison(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::Comparison>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadEinsum(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
ir::operation::Einsum::Param param;
-
- if (inputs.size() != 2)
- {
- throw std::runtime_error{"Einsum: NYI input - only support two inputs"};
- }
-
if (op->custom_options() == nullptr)
{
throw std::runtime_error{"Einsum: empty equation"};
@@ -1478,24 +1125,16 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadEinsum(const Operator *op, ir
param.equation = attr_map["equation"].ToString();
}
- std::unique_ptr<ir::Operation> new_op{new ir::operation::Einsum{inputs, outputs, param}};
- subg.addOperation(std::move(new_op));
+ const auto es = loadOperationTo<ir::operation::Einsum>(op, subg, param);
+ if (es->getInputs().size() != 2)
+ {
+ throw std::runtime_error{"Einsum: NYI input - only support two inputs"};
+ }
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFusedBatchNorm(const Operator *op,
- ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
ir::operation::FusedBatchNorm::Param param;
-
- if (inputs.size() != 5)
- {
- throw std::runtime_error{"FusedBatchNorm: NYI input - only support five inputs"};
- }
-
if (op->custom_options() == nullptr)
{
throw std::runtime_error{"FusedBatchNorm: empty option"};
@@ -1511,195 +1150,104 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFusedBatchNorm(const Operator
param.data_format = attr_map["data_format"].ToString();
}
- std::unique_ptr<ir::Operation> new_op{new ir::operation::FusedBatchNorm{inputs, outputs, param}};
- subg.addOperation(std::move(new_op));
+ const auto fbn = loadOperationTo<ir::operation::FusedBatchNorm>(op, subg, param);
+
+ if (fbn->getInputs().size() != 5)
+ {
+ throw std::runtime_error{"FusedBatchNorm: NYI input - only support five inputs"};
+ }
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOneHot(const Operator *op, ir::Graph &subg)
{
if (op->inputs()->size() != 4 || op->outputs()->size() != 1)
throw std::runtime_error("OneHot Op has wrong number of input or output tensors.");
- // Set input and output tensors
- ir::OperandIndexSequence inputs, outputs;
- loadOperationIO(op, inputs, outputs);
-
// Set parameter
- const auto axis = op->builtin_options_as_OneHotOptions()->axis();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::OneHot(inputs, outputs, {axis}));
- subg.addOperation(std::move(new_op));
-}
+ ir::operation::OneHot::Param param;
+ param.axis = op->builtin_options_as_OneHotOptions()->axis();
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- // ir::operation::Shape::Param param;
- // const auto *options = op->builtin_options_as_ShapeOptions();
- // param.out_type = tensorTypeToDataType(options->out_type());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Shape(inputs, outputs /*, param*/));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::OneHot>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadIf(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadIf(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ const auto *options = op->builtin_options_as_IfOptions();
+ const int32_t then_index = options->then_subgraph_index();
+ const int32_t else_index = options->else_subgraph_index();
- loadOperationIO(op, inputs, outputs);
+ verifySubgraphIndex(then_index);
+ verifySubgraphIndex(else_index);
ir::operation::If::Param param;
- const auto *options = op->builtin_options_as_IfOptions();
- const uint32_t then_index = options->then_subgraph_index();
- const uint32_t else_index = options->else_subgraph_index();
- param.then_subg_index = ir::SubgraphIndex{then_index};
- param.else_subg_index = ir::SubgraphIndex{else_index};
+ param.then_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(then_index)};
+ param.else_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(else_index)};
- std::unique_ptr<ir::Operation> new_op(new ir::operation::If(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::If>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ const auto *options = op->builtin_options_as_WhileOptions();
+ const int32_t cond_index = options->cond_subgraph_index();
+ const int32_t body_index = options->body_subgraph_index();
- loadOperationIO(op, inputs, outputs);
+ verifySubgraphIndex(cond_index);
+ verifySubgraphIndex(body_index);
ir::operation::While::Param param;
- const auto *options = op->builtin_options_as_WhileOptions();
- const uint32_t cond_index = options->cond_subgraph_index();
- const uint32_t body_index = options->body_subgraph_index();
- param.cond_subg_index = ir::SubgraphIndex{cond_index};
- param.body_subg_index = ir::SubgraphIndex{body_index};
+ param.cond_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(cond_index)};
+ param.body_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(body_index)};
- std::unique_ptr<ir::Operation> new_op(new ir::operation::While(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::While>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadArgMax(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto inputOperand = subg.operands().at(inputs.at(0));
- auto axisOperand = subg.operands().at(inputs.at(1));
-
- if (!axisOperand.isConstant())
- throw std::runtime_error("ArgMax: non-constant 'axis' is not supported.");
- if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
- axisOperand.typeInfo().type() == ir::DataType::INT64)))
- throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
-
ir::operation::ArgMax::Param param;
- param.axis = axisOperand.template asVector<int>()[0];
const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type();
switch (output_type)
{
case TensorType::TensorType_INT32:
case TensorType::TensorType_INT64:
+ param.output_type = tensorTypeToDataType(output_type);
break;
default:
throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64.");
}
- param.output_type = tensorTypeToDataType(output_type);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ArgMax(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
+ auto am = loadOperationTo<ir::operation::ArgMax>(op, subg, param);
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Pow(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Range(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto multiples = inputs.at(ir::operation::Tile::MULTIPLES);
-
- if (!subg.operands().at(multiples).isConstant())
- throw std::runtime_error("Tile: non-constant 'multiples' is not supported.");
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Tile(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS));
+ if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
+ axisOperand.typeInfo().type() == ir::DataType::INT64)))
+ throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
ir::operation::LogSoftmax::Param param;
-
// In tflite, beta is fixed to 1.0 and axis is fixed to -1.
param.beta = 1.0f;
param.axis = -1;
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogSoftmax(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
- ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
- subg.addOperation(std::move(new_op));
+ loadOperationTo<ir::operation::LogSoftmax>(op, subg, param);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
{
float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
1.f);
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg)
{
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1733,16 +1281,16 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadFC(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ADD:
- loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
+ loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::ADD);
return;
case BuiltinOperator::BuiltinOperator_SUB:
- loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
+ loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::SUB);
return;
case BuiltinOperator::BuiltinOperator_MUL:
- loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
+ loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::MUL);
return;
case BuiltinOperator::BuiltinOperator_DIV:
- loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
+ loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::DIV);
return;
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
@@ -1769,40 +1317,37 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
return;
case BuiltinOperator::BuiltinOperator_SELECT:
- loadSelect(op, subg);
- return;
case BuiltinOperator::BuiltinOperator_SELECT_V2:
- // Use same loader with BuiltinOperator_SELECT
- loadSelect(op, subg);
+ loadOperationTo<ir::operation::Select>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SQRT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
- loadSquaredDifference(op, subg);
+ loadOperationTo<ir::operation::SquaredDifference>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_TANH:
loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
1.f);
return;
case BuiltinOperator::BuiltinOperator_TRANSPOSE:
- loadTranspose(op, subg);
+ loadOperationTo<ir::operation::Transpose>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MEAN:
- loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
+ loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
- loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
+ loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
- loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
+ loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
return;
case BuiltinOperator::BuiltinOperator_REVERSE_V2:
- loadReverseV2(op, subg);
+ loadOperationTo<ir::operation::Reverse>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PAD:
case BuiltinOperator::BuiltinOperator_PADV2:
- loadPad(op, subg);
+ loadOperationTo<ir::operation::Pad>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGISTIC:
loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
@@ -1811,19 +1356,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
return;
case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
- loadExpandDims(op, subg);
+ loadOperationTo<ir::operation::ExpandDims>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_GATHER:
loadGather(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SPACE_TO_BATCH_ND:
- loadSpaceToBatchND(op, subg);
+ loadOperationTo<ir::operation::SpaceToBatchND>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_BATCH_TO_SPACE_ND:
- loadBatchToSpaceND(op, subg);
+ loadOperationTo<ir::operation::BatchToSpaceND>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUM:
- loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
+ loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
return;
case BuiltinOperator::BuiltinOperator_CUSTOM:
loadCustom(op, subg);
@@ -1832,7 +1377,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSqueeze(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PRELU:
- loadPrelu(op, subg);
+ loadOperationTo<ir::operation::PReLU>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SPLIT:
loadSplit(op, subg);
@@ -1841,7 +1386,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSplitV(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SLICE:
- loadSlice(op, subg);
+ loadOperationTo<ir::operation::Slice>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_STRIDED_SLICE:
loadStridedSlice(op, subg);
@@ -1850,10 +1395,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadUnpack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MINIMUM:
- loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
+ loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
return;
case BuiltinOperator::BuiltinOperator_MAXIMUM:
- loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
+ loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
return;
case BuiltinOperator::BuiltinOperator_CAST:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
@@ -1879,10 +1424,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
return;
case BuiltinOperator::BuiltinOperator_SHAPE:
- loadShape(op, subg);
+ loadOperationTo<ir::operation::Shape>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
- loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
+ loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
return;
case BuiltinOperator::BuiltinOperator_IF:
loadIf(op, subg);
@@ -1903,26 +1448,26 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
return;
case BuiltinOperator::BuiltinOperator_POW:
- loadPow(op, subg);
+ loadOperationTo<ir::operation::Pow>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
- loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
- op, subg);
+ loadElementwiseBinary(op, subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
return;
case BuiltinOperator::BuiltinOperator_FILL:
- loadFill(op, subg);
+ loadOperationTo<ir::operation::Fill>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
return;
case BuiltinOperator::BuiltinOperator_TILE:
- loadTile(op, subg);
+ loadOperationTo<ir::operation::Tile>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_RANGE:
- loadRange(op, subg);
+ loadOperationTo<ir::operation::Range>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
loadBatchMatMul(op, subg);
@@ -1937,13 +1482,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSpaceToDepth(op, subg);
return;
case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
- loadL2Normalization(op, subg);
+ loadOperationTo<ir::operation::L2Normalization>(op, subg);
break;
case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
loadLeakyRelu(op, subg);
return;
case BuiltinOperator::BuiltinOperator_RANK:
- loadRank(op, subg);
+ loadOperationTo<ir::operation::Rank>(op, subg);
return;
default:
throw std::runtime_error(
@@ -1951,8 +1496,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
}
}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
+template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
{
LoaderDomain::VerifyModelBuffer(*_verifier.get());
_model = LoaderDomain::GetModel(_base);
@@ -1967,8 +1511,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
auto subgraphs = std::make_unique<ir::Subgraphs>();
for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index)
{
- auto subg =
- static_cast<SpecificLoader *>(this)->loadSubgraph((*_model->subgraphs())[subgraph_index]);
+ auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]);
subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
}
_subgraphs = std::move(subgraphs);
diff --git a/runtime/onert/frontend/circle/CMakeLists.txt b/runtime/onert/frontend/circle/CMakeLists.txt
index 8bcf85dd3..76dca9989 100644
--- a/runtime/onert/frontend/circle/CMakeLists.txt
+++ b/runtime/onert/frontend/circle/CMakeLists.txt
@@ -8,7 +8,7 @@ add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(circle_loader PUBLIC onert_core)
+target_link_libraries(circle_loader PRIVATE onert_core)
target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
target_link_libraries(circle_loader PRIVATE circle_schema)
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 92a9ee7a5..4565ffc00 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -69,7 +69,7 @@ struct LoaderDomain
static bool VerifyModelBuffer(Verifier &verifier) { return circle::VerifyModelBuffer(verifier); }
};
-class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
+class CircleLoader final : public base_loader::BaseLoader<LoaderDomain>
{
protected:
void loadInstanceNorm(const Operator *op, ir::Graph &subg);
@@ -91,7 +91,8 @@ public:
}
}
- std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg)
+private:
+ std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg) override
{
auto subg = std::make_unique<ir::Graph>();
// Load tensors
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index ce7da579e..56ca5ef00 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -94,12 +94,36 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32
// Omitted optional input
// LSTM operation's some inputs can be optional input
+ // Transpose operation's permutation input can be optional input
if ((buffer == nullptr) && (length == 0))
{
+ uint32_t dims[1] = {0};
+ ANeuralNetworksOperandType compared_shape;
+ compared_shape.dimensionCount = 1;
+ compared_shape.dimensions = dims;
if (execution->hasUnspecifiedDims(operand_index))
{
return ANEURALNETWORKS_NO_ERROR;
}
+ else if (type == nullptr && execution->IsOptionalInput(operand_index))
+ {
+ if (!execution->setOptionalInput(index, type, buffer, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Fail to set optional input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+ }
+ // TODO Changes the condition to check zero sized
+ else if (execution->compareShape(&compared_shape, operand_index))
+ {
+ if (!execution->setInput(index, type, buffer, length))
+ {
+ VERBOSE(NNAPI::Execution) << "setInput: Fail to set input" << std::endl;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ return ANEURALNETWORKS_NO_ERROR;
+ }
else
{
VERBOSE(NNAPI::Execution) << "setInput: Cannot handle fully-specified shape on model build "
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
index eb12d7e76..6114b74b0 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
@@ -98,6 +98,17 @@ bool ANeuralNetworksExecution::compareShape(const ANeuralNetworksOperandType *ty
return operand_shape == shape_from_type;
}
+bool ANeuralNetworksExecution::IsOptionalInput(const onert::ir::OperandIndex index) noexcept
+{
+ const auto &operand_shape = _execution->primary_subgraph().operands().at(index).shape();
+ for (int32_t i = 0; i < operand_shape.rank(); ++i)
+ {
+ if (operand_shape.dim(i) != 0)
+ return false;
+ }
+ return true;
+}
+
bool ANeuralNetworksExecution::hasUnspecifiedDims(const onert::ir::OperandIndex index) noexcept
{
const auto operand_shape = _execution->primary_subgraph().operands().at(index).shape();
@@ -148,6 +159,45 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe
return true;
}
+bool ANeuralNetworksExecution::setOptionalInput(uint32_t index,
+ const ANeuralNetworksOperandType *type,
+ const void *buffer, size_t length) noexcept
+{
+ assert(type == nullptr);
+ assert(buffer == nullptr);
+ assert(length == 0);
+ try
+ {
+ onert::ir::IOIndex input_index{index};
+ const auto operand_index = getInputOperandIndex(index);
+
+ const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
+ const auto shape = (type != nullptr)
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
+
+ // ANeuralNetworksExecution::setInput() uses only shape information
+ ANeuralNetworksOperandType optional_input_type;
+ optional_input_type.dimensionCount = shape.rank();
+ std::vector<uint32_t> dims(optional_input_type.dimensionCount);
+ for (uint32_t i = 0; i < optional_input_type.dimensionCount; ++i)
+ {
+ dims.at(i) = shape.dim(i);
+ }
+ optional_input_type.dimensions = dims.data();
+
+ return setInput(index, &optional_input_type, buffer, length);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOperandType *type,
void *buffer, size_t length) noexcept
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 848ae743f..1f4b868f6 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -35,6 +35,8 @@ public:
public:
bool setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
size_t length) noexcept;
+ bool setOptionalInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length) noexcept;
bool setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer,
size_t length) noexcept;
bool startExecute(void) noexcept;
@@ -46,6 +48,7 @@ public:
const onert::ir::OperandIndex index) noexcept;
bool compareShape(const ANeuralNetworksOperandType *type,
const onert::ir::OperandIndex index) noexcept;
+ bool IsOptionalInput(const onert::ir::OperandIndex index) noexcept;
bool hasUnspecifiedDims(const onert::ir::OperandIndex index) noexcept;
size_t getOperandSize(const onert::ir::OperandIndex index) noexcept;
const std::shared_ptr<onert::exec::Execution> instance(void) noexcept;
diff --git a/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc
index 15a279a7e..bb42f2b08 100644
--- a/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc
@@ -16,10 +16,10 @@
#include <gtest/gtest.h>
-#include "wrapper/ANeuralNetworksModel.h"
+#include "ANeuralNetworksModel.h"
-TEST(MODEL, model_build)
+TEST(MODEL, neg_model_build)
{
ANeuralNetworksModel model;
- ASSERT_EQ(model.isFinished(), false);
+ ASSERT_FALSE(model.isFinished());
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 8e3d83db4..e6c38f5f8 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -708,31 +708,7 @@ OperationFactory::OperationFactory()
return new operation::StridedSlice{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_TRANSPOSE] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- // TODO make this work with init_param.input_count == 1 (when permutation vector is optional)
-
- // Inputs
- // 0: An n-D tensor, specifying the tensor to be transposed.
- // 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
- // the permutation of the dimensions of the input tensor.
- // The returned tensor's dimension i corresponds to the input dimension
- // perm[i]. If perm is not given, it is set to (n-1...0), where n is the
- // rank of the input tensor. Hence by default, this operation performs a
- // regular matrix transpose on 2-D input Tensors.
- assert(init_param.input_count == 2);
- assert(init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
- std::vector<std::int32_t> perm =
- operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
- operation::Transpose::Param param;
- param.perm.assign(perm.cbegin(), perm.cend());
-
- return new operation::Transpose{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>;
_map[ANEURALNETWORKS_MUL] =
getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
@@ -982,6 +958,28 @@ OperationFactory::OperationFactory()
return new operation::ResizeBilinear{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert((init_param.input_count == 3 || init_param.input_count == 4) &&
+ init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Index
+ // 1 -> Height Index
+ // 2 -> Width Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ operation::ResizeNearestNeighbor::Param param;
+ param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
+ param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
+ param.align_corners = false;
+ // The layout input is not supported yet
+ return new operation::ResizeNearestNeighbor{inputs, outputs, param};
+ };
+
_map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
@@ -1304,6 +1302,105 @@ OperationFactory::OperationFactory()
}
param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
+ // This is initialization to prevent warning or error by static code analyzer. LSTM operation
+ // does not need time_major
+ param.time_major = false;
+
+ return new operation::LSTM{inputs, outputs, param};
+ };
+
+ _map[ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert((init_param.input_count >= 24 || init_param.input_count <= 28) &&
+ (init_param.output_count >= 1 && init_param.output_count <= 3));
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Input to Input Tensor Index
+ // 2 -> Input to Forget Tensor Index
+ // 3 -> Input to Cell Tensor Index
+ // 4 -> Input to Output Tensor Index
+ // 5 -> Recurrent to Input Weights Tensor Index
+ // 6 -> Recurrent to Forget Weights Tensor Index
+ // 7 -> Recurrent to Cell Weights Tensor Index
+ // 8 -> Recurrent to Output Weights Tensor Index
+ // 9 -> Cell to Input Weights Tensor Index
+ // 10 -> Cell to Forget Weights Tensor Index
+ // 11 -> Cell to Output Weights Tensor Index
+ // 12 -> Input Gate Bias Tensor Index
+ // 13 -> Forget Gate Bias Tensor Index
+ // 14 -> Cell Bias Tensor Index
+ // 15 -> Output Gate Bias Tensor Index
+ // 16 -> Projection Weights Tensor Index
+ // 17 -> Projection Bias Tensor Index
+ // 18 -> Output State In Tensor Index
+ // 19 -> Cell State In Tensor Index
+ assert(init_param.input_count - 3 > 20);
+ OperandIndexSequence inputs;
+ for (uint32_t n = 0; n < 20; ++n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+
+ // 24 -> Input Layer Normalization Weights Tensor Index
+ // 25 -> Forget Layer Normalization Weights Tensor Index
+ // 26 -> Cell Layer Normalization Weights Tensor Index
+ // 27 -> Output Layer Normalization Weights Tensor Index
+ if (init_param.input_count > 24)
+ {
+ for (uint32_t n = 24; n < 28; ++n)
+ {
+ if (init_param.input_count > n)
+ {
+ inputs.append(OperandIndex{init_param.inputs[n]});
+ }
+ }
+ }
+
+ // Each output should be interpreted as follows:
+ //
+ // 0 -> Output Tensor Index -> 3
+ // 1 -> Output State Out Tensor Index
+ // 2 -> Cell State Out Tensor Index
+ const OperandIndex scratch_buffer_index;
+ OperandIndex output_state_index =
+ init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
+ OperandIndex cell_state_index =
+ init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
+ const OperandIndex output_index = OperandIndex{init_param.outputs[0]};
+ OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index,
+ output_index};
+
+ operation::LSTM::Param param;
+ const auto activation_index = OperandIndex{init_param.inputs[20]};
+ switch (operands.at(activation_index).asScalar<int32_t>())
+ {
+ case 0:
+ param.activation = Activation::NONE;
+ break;
+ case 1:
+ param.activation = Activation::RELU;
+ break;
+ case 2:
+ param.activation = Activation::RELU1;
+ break;
+ case 3:
+ param.activation = Activation::RELU6;
+ break;
+ case 4:
+ param.activation = Activation::TANH;
+ break;
+ case 6:
+ param.activation = Activation::SIGMOID;
+ break;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ break;
+ }
+ param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
+ param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
+ param.time_major = operands.at(OperandIndex{init_param.inputs[23]}).asScalar<bool>();
return new operation::LSTM{inputs, outputs, param};
};
@@ -1406,7 +1503,7 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_ABS_EX
_map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
- _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &operands) {
+ _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
OperandIndexSequence outputs{init_param.outputs[0]};
@@ -1415,10 +1512,9 @@ OperationFactory::OperationFactory()
//
// 0 -> Input Tensor Index
// 1 -> Axis Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
operation::ArgMax::Param param;
- param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
// NNAPI ARGMAX output type is always int32
param.output_type = DataType::INT32;
@@ -1517,7 +1613,7 @@ OperationFactory::OperationFactory()
assert(init_param.input_count == 3);
assert(init_param.output_count >= 1); // At least one output tensor and axis
- OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence inputs{init_param.inputs[1], init_param.inputs[0]};
OperandIndexSequence outputs;
for (uint32_t n = 0; n < init_param.output_count; ++n)
{
@@ -1525,7 +1621,6 @@ OperationFactory::OperationFactory()
}
operation::Split::Param param;
- param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
param.num_splits = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<std::int32_t>();
return new operation::Split{inputs, outputs, param};
diff --git a/runtime/onert/frontend/tflite/CMakeLists.txt b/runtime/onert/frontend/tflite/CMakeLists.txt
index fcadf5223..604a9e4cb 100644
--- a/runtime/onert/frontend/tflite/CMakeLists.txt
+++ b/runtime/onert/frontend/tflite/CMakeLists.txt
@@ -8,7 +8,7 @@ add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(tflite_loader PUBLIC onert_core)
+target_link_libraries(tflite_loader PRIVATE onert_core)
target_link_libraries(tflite_loader PRIVATE base_loader nnfw_common nnfw_coverage)
install(TARGETS tflite_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 7eef15717..fe4295ada 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -62,7 +62,7 @@ struct LoaderDomain
}
};
-class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain, TFLiteLoader>
+class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain>
{
public:
using BaseLoader::BaseLoader;
@@ -78,7 +78,8 @@ public:
}
}
- std::unique_ptr<ir::Graph> loadSubgraph(const onert_tflite::SubGraph *tflite_subg)
+private:
+ std::unique_ptr<ir::Graph> loadSubgraph(const onert_tflite::SubGraph *tflite_subg) override
{
auto subg = std::make_unique<ir::Graph>();
// Load tensors
diff --git a/runtime/onert/test/graph/Index.cc b/runtime/onert/test/graph/Index.cc
index 358e64c82..2d110e326 100644
--- a/runtime/onert/test/graph/Index.cc
+++ b/runtime/onert/test/graph/Index.cc
@@ -20,7 +20,7 @@
using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-TEST(Index, index_test)
+TEST(Index, neg_index_test)
{
Index idx1{1u};
Index idx2{2u};
diff --git a/runtime/onert/test/graph/operand/IndexSet.cc b/runtime/onert/test/graph/operand/IndexSet.cc
index 6215e0d24..6ef425a2d 100644
--- a/runtime/onert/test/graph/operand/IndexSet.cc
+++ b/runtime/onert/test/graph/operand/IndexSet.cc
@@ -21,7 +21,7 @@
using onert::ir::OperandIndex;
using onert::ir::OperandIndexSequence;
-TEST(graph_OperandIndexSequence, append)
+TEST(graph_OperandIndexSequence, neg_append)
{
OperandIndexSequence iset{0, 2, 4, 8};
@@ -42,7 +42,7 @@ TEST(graph_OperandIndexSequence, append)
ASSERT_FALSE(iset.contains(OperandIndex{11}));
}
-TEST(graph_OperandIndexSequence, replace)
+TEST(graph_OperandIndexSequence, neg_replace)
{
OperandIndexSequence iset{0, 1, 2, 3};
diff --git a/runtime/onert/test/graph/operand/LayoutSet.cc b/runtime/onert/test/graph/operand/LayoutSet.cc
index e35bddd8b..ef965a41e 100644
--- a/runtime/onert/test/graph/operand/LayoutSet.cc
+++ b/runtime/onert/test/graph/operand/LayoutSet.cc
@@ -21,7 +21,22 @@
using onert::ir::Layout;
using onert::ir::LayoutSet;
-TEST(graph_operand_LayoutSet, layout_set_operators)
+TEST(graph_operand_LayoutSet, neg_add_remove)
+{
+ LayoutSet set{Layout::NCHW};
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 2);
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+}
+
+TEST(graph_operand_LayoutSet, set_operators)
{
LayoutSet set1{Layout::NCHW};
LayoutSet set2{Layout::NHWC};
diff --git a/runtime/onert/test/graph/operand/Set.cc b/runtime/onert/test/graph/operand/Set.cc
index 0d35b5581..ffee417b8 100644
--- a/runtime/onert/test/graph/operand/Set.cc
+++ b/runtime/onert/test/graph/operand/Set.cc
@@ -18,7 +18,7 @@
#include "ir/Operands.h"
-TEST(graph_operand_Set, set_test)
+TEST(graph_operand_Set, neg_set_test)
{
onert::ir::Operands set;
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc
index cd2cdb739..a8686eb18 100644
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ b/runtime/onert/test/graph/operand/UseDef.cc
@@ -31,7 +31,7 @@ using Mock = onert_test::ir::SimpleMock;
} // namespace
-TEST(graph_operand_usedef, usedef_test)
+TEST(graph_operand_usedef, neg_usedef_test)
{
onert::ir::Graph graph;
onert::ir::verifier::DAGChecker verifier;
@@ -62,7 +62,7 @@ TEST(graph_operand_usedef, usedef_test)
graph.finishBuilding();
- ASSERT_EQ(verifier.verify(graph), true);
+ ASSERT_TRUE(verifier.verify(graph));
// Check def
ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
diff --git a/runtime/onert/test/graph/operation/SetIO.cc b/runtime/onert/test/graph/operation/SetIO.cc
index 378c5b4b9..22068ff58 100644
--- a/runtime/onert/test/graph/operation/SetIO.cc
+++ b/runtime/onert/test/graph/operation/SetIO.cc
@@ -62,7 +62,7 @@ TEST(graph_operation_setIO, operation_setIO_conv)
ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
}
-TEST(graph_operation_setIO, operation_setIO_concat)
+TEST(graph_operation_setIO, neg_operation_setIO_concat)
{
onert::ir::Graph graph;
diff --git a/runtime/onert/test/graph/verifier/Verifier.cc b/runtime/onert/test/graph/verifier/Verifier.cc
index f8c7557e3..3bce2746c 100644
--- a/runtime/onert/test/graph/verifier/Verifier.cc
+++ b/runtime/onert/test/graph/verifier/Verifier.cc
@@ -45,5 +45,54 @@ TEST(Verifier, dag_checker)
onert::ir::verifier::DAGChecker verifier;
- ASSERT_EQ(verifier.verify(graph), true);
+ ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ graph.finishBuilding();
+
+ graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+ onert::ir::verifier::EdgeConsistencyChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto mock_op_ptr = mock_op.get();
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ graph.finishBuilding();
+
+ mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+ onert::ir::verifier::EdgeConsistencyChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
}
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index aab33fab5..a5f0af5ee 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -34,7 +34,7 @@ TEST(ShapeInference, Elementwise)
ASSERT_EQ(infered_out_shape.dim(3), 3);
}
-TEST(ShapeInference, IncorrectElementwise)
+TEST(ShapeInference, neg_Elementwise)
{
Shape lhs_shape{1, 299, 299, 3};
Shape rhs_shape{5, 3};
@@ -123,6 +123,18 @@ TEST(ShapeInference, Pool2DNodeExplicit)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
}
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{0, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+ std::runtime_error);
+}
+
TEST(ShapeInference, Conv2D)
{
Shape in_shape{10, 6, 12, 20};
@@ -159,6 +171,17 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
}
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
TEST(ShapeInference, DepthwiseConv2D)
{
Shape in_shape{10, 6, 12, 20};
@@ -195,6 +218,17 @@ TEST(ShapeInference, DepthwiseConv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
}
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE};
+ ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
TEST(ShapeInference, Concat)
{
{
@@ -328,7 +362,8 @@ TEST(ShapeInference, Transpose)
// pre-conditions
ASSERT_EQ(in_shape.rank(), perm.size());
ASSERT_EQ(expected.rank(), perm.size());
- auto inferred_out_shape = onert::shape_inference::inferTransposeShape(in_shape, perm);
+ auto inferred_out_shape =
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
// post-conditions
ASSERT_EQ(inferred_out_shape.rank(), perm.size());
for (int32_t dim = 0; dim < expected.rank(); dim++)
@@ -369,12 +404,141 @@ TEST(ShapeInference, neg_Transpose)
{
std::vector<int> perm = {2, 0, 1, 0};
// int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm), std::runtime_error);
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
}
// Invalid parameter value
{
std::vector<int> perm = {2, 0, 3};
// int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm), std::runtime_error);
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, Gather)
+{
+ auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+ int rank = input.rank();
+ auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ // check for 2-D, 3-D, axis 0
+ {
+ Shape input{3, 4};
+ Shape indices{1, 1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 1, 2, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 2-D, 3-D, axis 1
+ {
+ Shape input{3, 4};
+ Shape indices{1, 2, 1};
+ int32_t axis = 1;
+ Shape expected{3, 1, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 0
+ {
+ Shape input{2, 3, 4};
+ Shape indices{1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 2
+ {
+ Shape input{2, 3, 4};
+ Shape indices{2, 1};
+ int32_t axis = 2;
+ Shape expected{2, 3, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 4D, axis 0
+ {
+ Shape input{1, 2, 3, 4};
+ Shape indices{2};
+ int32_t axis = 0;
+ Shape expected{2, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+ auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+ Shape &expected) {
+ auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape,
+ cluster.data());
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape in_shape{10, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+ Shape expected{30, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+
+ {
+ Shape in_shape{1, 1};
+ Shape cluster_shape{1, 2};
+ std::vector<int> cluster = {3, 50};
+
+ Shape expected{50, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+ auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+ uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+ operation::BCQGather::Param param{hidden_size, axis};
+ auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster.data(), rank, param);
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 0;
+ int rank = 2;
+
+ Shape expected{5, 1, 10};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 1;
+ int rank = 2;
+
+ Shape expected{30, 5, 1};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
}
}