summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2022-09-07 19:04:21 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2022-09-07 19:04:21 +0900
commitc690d52bdd137ed6a17353aa7af35e8141ece77b (patch)
treedbb7dd99133132dfbffcb8c9e9af4f1ffc2f4808 /runtime
parent3ad689f0803519e343c36d5700646e86059df961 (diff)
downloadnnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.tar.gz
nnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.tar.bz2
nnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.zip
Diffstat (limited to 'runtime')
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/libs/misc/CMakeLists.txt19
-rw-r--r--runtime/libs/misc/include/misc/EnvConfigSource.h (renamed from runtime/onert/core/include/util/EnvConfigSource.h)18
-rw-r--r--runtime/libs/misc/include/misc/GeneralConfigSource.h (renamed from runtime/onert/core/include/util/GeneralConfigSource.h)18
-rw-r--r--runtime/libs/misc/include/misc/IConfigSource.h (renamed from runtime/onert/core/include/util/IConfigSource.h)14
-rw-r--r--runtime/libs/misc/include/misc/string_helpers.h2
-rw-r--r--runtime/libs/misc/src/EnvConfigSource.cpp (renamed from runtime/onert/core/src/util/EnvConfigSource.cc)10
-rw-r--r--runtime/libs/misc/src/GeneralConfigSource.cpp (renamed from runtime/onert/core/src/util/GeneralConfigSource.cc)17
-rw-r--r--runtime/libs/misc/src/string_helpers.test.cpp81
-rw-r--r--runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp59
-rw-r--r--runtime/libs/misc/src/tensor/IndexIterator.test.cpp (renamed from runtime/libs/misc/examples/tensor_index_iterator.cpp)37
-rw-r--r--runtime/libs/ndarray/CMakeLists.txt11
-rw-r--r--runtime/libs/ndarray/include/ndarray/Array.h24
-rw-r--r--runtime/libs/ndarray/src/Array.test.cpp452
-rw-r--r--runtime/libs/ndarray/src/ContiguousSpan.test.cpp198
-rw-r--r--runtime/libs/ndarray/src/detail/cxx14.h67
-rw-r--r--runtime/libs/ndarray/test/CMakeLists.txt18
-rw-r--r--runtime/libs/ndarray/test/ndarray_test.cpp122
-rw-r--r--runtime/onert/CMakeLists.txt6
-rw-r--r--runtime/onert/api/CMakeLists.txt1
-rw-r--r--runtime/onert/api/include/nnfw.h4
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc10
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc267
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h29
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h4
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h4
-rw-r--r--runtime/onert/backend/cpu/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h2
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc10
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc58
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc64
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h8
-rw-r--r--runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc4
-rw-r--r--runtime/onert/backend/ruy/ExternalContext.h2
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.cc10
-rw-r--r--runtime/onert/backend/trix/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/trix/DevContext.h42
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.cc105
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.h2
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.cc10
-rw-r--r--runtime/onert/core/CMakeLists.txt14
-rw-r--r--runtime/onert/core/include/backend/ITensor.h1
-rw-r--r--runtime/onert/core/include/backend/basic/BackendContextHelpers.h4
-rw-r--r--runtime/onert/core/include/compiler/BackendManager.h13
-rw-r--r--runtime/onert/core/include/compiler/Compiler.h74
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h7
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInferer.h78
-rw-r--r--runtime/onert/core/include/exec/Execution.h6
-rw-r--r--runtime/onert/core/include/exec/Executors.h71
-rw-r--r--runtime/onert/core/include/exec/FunctionSequence.h3
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h2
-rw-r--r--runtime/onert/core/include/ir/Graph.h29
-rw-r--r--runtime/onert/core/include/ir/Index.h10
-rw-r--r--runtime/onert/core/include/ir/Layout.h1
-rw-r--r--runtime/onert/core/include/ir/Model.h (renamed from runtime/onert/core/include/ir/Subgraphs.h)26
-rw-r--r--runtime/onert/core/include/ir/NNPkg.h193
-rw-r--r--runtime/onert/core/include/ir/TypeInfo.h6
-rw-r--r--runtime/onert/core/include/ir/operation/Bulk.h2
-rw-r--r--runtime/onert/core/include/util/CalculateActivationRange.h2
-rw-r--r--runtime/onert/core/include/util/Config.lst2
-rw-r--r--runtime/onert/core/include/util/ConfigSource.h10
-rw-r--r--runtime/onert/core/include/util/ObjectManager.h13
-rw-r--r--runtime/onert/core/include/util/TracingCtx.h26
-rw-r--r--runtime/onert/core/src/backend/builtin/ExternalContext.h2
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.cc32
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.h17
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc16
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.h7
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc4
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h6
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc19
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h6
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc15
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc505
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc85
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h26
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc10
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc11
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h18
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.test.cc (renamed from runtime/onert/test/core/compiler/HEScheduler.cc)49
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc10
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc44
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.cc667
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.h8
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc648
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h13
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc1
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc18
-rw-r--r--runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc (renamed from runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc)6
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.cc222
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h25
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h17
-rw-r--r--runtime/onert/core/src/exec/ExecTime.cc6
-rw-r--r--runtime/onert/core/src/exec/ExecTime.test.cc (renamed from runtime/onert/test/core/exec/ExecTime.test.cc)5
-rw-r--r--runtime/onert/core/src/exec/Execution.cc24
-rw-r--r--runtime/onert/core/src/exec/Execution.test.cc (renamed from runtime/onert/test/core/exec/ExecInstance.cc)49
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.h5
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc14
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h13
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc5
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h15
-rw-r--r--runtime/onert/core/src/exec/Executors.cc183
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc4
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.cc4
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h14
-rw-r--r--runtime/onert/core/src/exec/feature/MockTensor.h66
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc86
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.h2
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.test.cc86
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.cc7
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.h7
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.test.cc (renamed from runtime/onert/test/core/interp/ExecManager.cc)45
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/Concat.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Conv2D.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc9
-rw-r--r--runtime/onert/core/src/interp/operations/FullyConnected.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Gather.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/InstanceNorm.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Pad.cc6
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc12
-rw-r--r--runtime/onert/core/src/interp/operations/Reshape.cc2
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/TransposeConv.cc8
-rw-r--r--runtime/onert/core/src/ir/Graph.cc14
-rw-r--r--runtime/onert/core/src/ir/Graph.test.cc (renamed from runtime/onert/test/core/ir/Graph.cc)5
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.test.cc (renamed from runtime/onert/test/core/ir/LayoutSet.cc)4
-rw-r--r--runtime/onert/core/src/ir/MockNode.h (renamed from runtime/onert/test/core/ir/MockNode.h)0
-rw-r--r--runtime/onert/core/src/ir/Operand.test.cc (renamed from runtime/onert/test/core/ir/UseDef.cc)9
-rw-r--r--runtime/onert/core/src/ir/OperandIndexSequence.test.cc (renamed from runtime/onert/test/core/ir/OperandIndexSet.cc)4
-rw-r--r--runtime/onert/core/src/ir/Operands.test.cc (renamed from runtime/onert/test/core/ir/OperandSet.cc)4
-rw-r--r--runtime/onert/core/src/ir/Operation.test.cc (renamed from runtime/onert/test/core/ir/SetIO.cc)7
-rw-r--r--runtime/onert/core/src/ir/Operations.test.cc (renamed from runtime/onert/test/core/ir/OperationSet.cc)5
-rw-r--r--runtime/onert/core/src/ir/Shape.test.cc (renamed from runtime/onert/test/core/ir/Shape.cc)2
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.test.cc (renamed from runtime/onert/test/core/ir/Verifier.cc)11
-rw-r--r--runtime/onert/core/src/util/ChromeTracingEventWriter.cc6
-rw-r--r--runtime/onert/core/src/util/ConfigSource.cc25
-rw-r--r--runtime/onert/core/src/util/EventCollector.cc2
-rw-r--r--runtime/onert/core/src/util/EventCollector.h7
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc2
-rw-r--r--runtime/onert/core/src/util/EventWriter.cc2
-rw-r--r--runtime/onert/core/src/util/Index.test.cc (renamed from runtime/onert/test/core/util/Index.cc)4
-rw-r--r--runtime/onert/core/src/util/MDTableEventWriter.cc10
-rw-r--r--runtime/onert/core/src/util/ObjectManager.test.cc (renamed from runtime/onert/test/core/util/ObjectManager.cc)6
-rw-r--r--runtime/onert/core/src/util/SNPEEventWriter.cc5
-rw-r--r--runtime/onert/core/src/util/ShapeInference.test.cc (renamed from runtime/onert/test/core/util/ShapeInference.cc)5
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h36
-rw-r--r--runtime/onert/frontend/circle/include/circle_loader.h4
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc16
-rw-r--r--runtime/onert/frontend/nnapi/execution.cc2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc9
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h21
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc8
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h4
-rw-r--r--runtime/onert/frontend/tflite/include/tflite_loader.h2
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc8
-rw-r--r--runtime/onert/frontend/trix/CMakeLists.txt2
-rw-r--r--runtime/onert/frontend/trix/include/trix_loader.h2
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader.cc32
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader_dummy.cc6
-rw-r--r--runtime/onert/test/CMakeLists.txt15
-rw-r--r--runtime/service/CMakeLists.txt1
-rw-r--r--runtime/service/npud/CMakeLists.txt21
-rw-r--r--runtime/service/npud/core/Server.cc65
-rw-r--r--runtime/service/npud/core/Server.h55
-rw-r--r--runtime/service/npud/core/Signal.cc56
-rw-r--r--runtime/service/npud/core/Signal.h37
-rw-r--r--runtime/service/npud/core/main.cc40
-rw-r--r--runtime/service/npud/util/Config.lst22
-rw-r--r--runtime/service/npud/util/ConfigSource.cc126
-rw-r--r--runtime/service/npud/util/ConfigSource.h51
-rw-r--r--runtime/service/npud/util/Logging.h88
179 files changed, 4578 insertions, 2136 deletions
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 9a9465072..bc39a09b9 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.20.0"
+ versionName "1.21.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt
index 557d403ec..69d6a9208 100644
--- a/runtime/libs/misc/CMakeLists.txt
+++ b/runtime/libs/misc/CMakeLists.txt
@@ -1,11 +1,22 @@
# Library `nnfw_lib_misc`
-file(GLOB_RECURSE NNFW_UTILITY_SRCS "src/*.cpp")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS})
+add_library(nnfw_lib_misc STATIC ${SOURCES})
target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage)
-add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
-target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_executable(nnfw_lib_misc_test ${TESTS})
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_lib_misc)
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage)
+target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone)
diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/libs/misc/include/misc/EnvConfigSource.h
index 8c5d0e8e9..63c8ae9c0 100644
--- a/runtime/onert/core/include/util/EnvConfigSource.h
+++ b/runtime/libs/misc/include/misc/EnvConfigSource.h
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
+#ifndef __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+#define __NNFW_MISC_ENV_CONFIG_SOURCE_H__
-#include <unordered_map>
+#include "GeneralConfigSource.h"
-#include "util/GeneralConfigSource.h"
+#include <unordered_map>
-namespace onert
+namespace nnfw
{
-namespace util
+namespace misc
{
class EnvConfigSource final : public GeneralConfigSource
@@ -35,7 +35,7 @@ private:
std::unordered_map<std::string, std::string> _default_attributes;
};
-} // namespace util
-} // namespace onert
+} // namespace misc
+} // namespace nnfw
-#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
+#endif // __NNFW_MISC_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/libs/misc/include/misc/GeneralConfigSource.h
index dedc820ec..a3de66e81 100644
--- a/runtime/onert/core/include/util/GeneralConfigSource.h
+++ b/runtime/libs/misc/include/misc/GeneralConfigSource.h
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
+#ifndef __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+#define __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
-#include <unordered_map>
+#include "IConfigSource.h"
-#include "util/IConfigSource.h"
+#include <unordered_map>
-namespace onert
+namespace nnfw
{
-namespace util
+namespace misc
{
class GeneralConfigSource : public IConfigSource
@@ -38,7 +38,7 @@ private:
std::unordered_map<std::string, std::string> _map;
};
-} // namespace util
-} // namespace onert
+} // namespace misc
+} // namespace nnfw
-#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
+#endif // __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/libs/misc/include/misc/IConfigSource.h
index 07b09848a..fe2c48ecf 100644
--- a/runtime/onert/core/include/util/IConfigSource.h
+++ b/runtime/libs/misc/include/misc/IConfigSource.h
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_I_CONFIG_SOURCE_H__
+#ifndef __NNFW_MISC_I_CONFIG_SOURCE_H__
+#define __NNFW_MISC_I_CONFIG_SOURCE_H__
#include <string>
-namespace onert
+namespace nnfw
{
-namespace util
+namespace misc
{
struct IConfigSource
@@ -40,7 +40,7 @@ struct IConfigSource
virtual std::string get(const std::string &key) const = 0;
};
-} // namespace util
-} // namespace onert
+} // namespace misc
+} // namespace nnfw
-#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__
+#endif // __NNFW_MISC_I_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h
index 46fecca71..c9d72034f 100644
--- a/runtime/libs/misc/include/misc/string_helpers.h
+++ b/runtime/libs/misc/include/misc/string_helpers.h
@@ -50,7 +50,7 @@ inline std::vector<std::string> split(const std::string &s, char delim)
std::vector<std::string> elems;
while (std::getline(ss, item, delim))
{
- elems.push_back(std::move(item));
+ elems.push_back(item);
}
return elems;
}
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/libs/misc/src/EnvConfigSource.cpp
index 0d25b7353..3abc9d196 100644
--- a/runtime/onert/core/src/util/EnvConfigSource.cc
+++ b/runtime/libs/misc/src/EnvConfigSource.cpp
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include "util/EnvConfigSource.h"
+#include "misc/EnvConfigSource.h"
#include <cstdlib>
-namespace onert
+namespace nnfw
{
-namespace util
+namespace misc
{
std::string EnvConfigSource::get(const std::string &key) const
@@ -36,5 +36,5 @@ std::string EnvConfigSource::get(const std::string &key) const
}
}
-} // namespace util
-} // namespace onert
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/libs/misc/src/GeneralConfigSource.cpp
index 7d2757e58..298c1663e 100644
--- a/runtime/onert/core/src/util/GeneralConfigSource.cc
+++ b/runtime/libs/misc/src/GeneralConfigSource.cpp
@@ -14,12 +14,11 @@
* limitations under the License.
*/
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
+#include "misc/GeneralConfigSource.h"
-namespace onert
+namespace nnfw
{
-namespace util
+namespace misc
{
std::string GeneralConfigSource::get(const std::string &key) const
@@ -35,11 +34,7 @@ std::string GeneralConfigSource::get(const std::string &key) const
}
}
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
-{
- VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
- _map[key] = val;
-}
+void GeneralConfigSource::set(const std::string &key, const std::string &val) { _map[key] = val; }
-} // namespace util
-} // namespace onert
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/string_helpers.test.cpp b/runtime/libs/misc/src/string_helpers.test.cpp
new file mode 100644
index 000000000..1111425d0
--- /dev/null
+++ b/runtime/libs/misc/src/string_helpers.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/string_helpers.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringHelpersTest, split)
+{
+ const std::string example = "abc;def;ghi";
+
+ auto str_vector = nnfw::misc::split(example, ';');
+
+ ASSERT_EQ(str_vector.size(), 3);
+ EXPECT_STREQ(str_vector[0].c_str(), "abc");
+ EXPECT_STREQ(str_vector[1].c_str(), "def");
+ EXPECT_STREQ(str_vector[2].c_str(), "ghi");
+}
+
+TEST(StringHelpersTest, neg_split_empty)
+{
+ const std::string example = "";
+
+ auto str_vector = nnfw::misc::split(example, ';');
+
+ ASSERT_EQ(str_vector.size(), 0);
+}
+
+TEST(StringHelpersTest, neg_nonsplit)
+{
+ const std::string example = "abc;def;ghi";
+
+ auto str_vector = nnfw::misc::split(example, ':');
+
+ ASSERT_EQ(str_vector.size(), 1);
+ EXPECT_STREQ(str_vector[0].c_str(), example.c_str());
+}
+
+TEST(StringHelpersTest, append)
+{
+ auto append_str = nnfw::misc::str("abc", "-", 1);
+
+ EXPECT_STREQ(append_str.c_str(), "abc-1");
+}
+
+TEST(StringHelpersTest, neg_append_nullstr)
+{
+ const char *null_str = nullptr;
+ auto append_str = nnfw::misc::str(null_str, null_str);
+
+ ASSERT_EQ(append_str.size(), 0);
+}
+
+TEST(StringHelpersTest, join)
+{
+ const std::vector<std::string> example = {"abc", "def", "ghi"};
+
+ auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+ EXPECT_STREQ(join_str.c_str(), "abc;def;ghi");
+}
+
+TEST(StringHelpersTest, neg_join_empty)
+{
+ const std::vector<std::string> example = {};
+
+ auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+ ASSERT_EQ(join_str.size(), 0);
+}
diff --git a/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..4cff6067f
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nnfw::misc::tensor::Shape;
+using nnfw::misc::tensor::Index;
+using nnfw::misc::tensor::IndexEnumerator;
+
+TEST(MiscIndexEnumeratorTest, iterate_full_range)
+{
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+
+ const Shape shape{H, W};
+
+ std::vector<uint32_t> count;
+
+ count.resize(H * W, 0);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.curr();
+
+ ASSERT_EQ(2, ind.rank());
+ count.at(ind.at(0) * W + ind.at(1)) += 1;
+ }
+
+ ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
+
+TEST(MiscIndexEnumeratorTest, neg_zero_rank_shape)
+{
+ // Test abnormal case of empty shape
+ // It is expected not to throw any exception, do nothing
+ const Shape shape{};
+ IndexEnumerator e{shape};
+ ASSERT_NO_THROW(e.valid());
+ ASSERT_NO_THROW(e.advance());
+ SUCCEED();
+}
diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
index 590b433df..875786bdd 100644
--- a/runtime/libs/misc/examples/tensor_index_iterator.cpp
+++ b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
@@ -16,24 +16,21 @@
#include "misc/tensor/IndexIterator.h"
-#include <array>
+#include <gtest/gtest.h>
-#include <iostream>
#include <algorithm>
+#include <array>
-#include <cassert>
+using namespace nnfw::misc::tensor;
-void test_iterate(void)
+TEST(MiscIndexIteratorTest, iterate)
{
- const nnfw::misc::tensor::Shape shape{3, 4, 7};
+ const Shape shape{3, 4, 7};
std::array<int, 3 * 4 * 7> array;
array.fill(0);
- using nnfw::misc::tensor::Index;
- using nnfw::misc::tensor::iterate;
-
iterate(shape) << [&](const Index &index) {
assert(index.rank() == shape.rank());
@@ -50,25 +47,15 @@ void test_iterate(void)
array[offset] += 1;
};
- assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
+ ASSERT_TRUE(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
}
-int main(int argc, char **argv)
+TEST(MiscIndexIteratorTest, neg_zero_rank_shape)
{
- test_iterate();
-
- nnfw::misc::tensor::Shape shape{3, 4, 3, 4};
-
- std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl;
-
- nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) {
- std::cout << "rank: " << index.rank() << std::endl;
-
- for (uint32_t d = 0; d < index.rank(); ++d)
- {
- std::cout << " offset(" << d << ") = " << index.at(d) << std::endl;
- }
- };
+ // Test abnormal case of empty shape
+ // It is expected not to throw any exception, do nothing
+ const Shape shape{};
- return 0;
+ ASSERT_NO_THROW(iterate(shape) << ([](const Index &index) {}));
+ SUCCEED();
}
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
index f88f13186..cf8c5208a 100644
--- a/runtime/libs/ndarray/CMakeLists.txt
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -3,8 +3,6 @@ add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(ndarray PUBLIC include)
-#can't make this private because of c++ templates
-target_include_directories(ndarray PUBLIC src)
option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
@@ -19,5 +17,12 @@ if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)
-add_subdirectory(test)
+add_executable(ndarray_test src/Array.test.cpp src/ContiguousSpan.test.cpp)
+target_link_libraries(ndarray_test PRIVATE ndarray)
+target_link_libraries(ndarray_test PRIVATE nnfw_coverage)
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest_standalone)
+
add_subdirectory(example)
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
index 09e791763..568fe1c77 100644
--- a/runtime/libs/ndarray/include/ndarray/Array.h
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -22,37 +22,21 @@
#include "ContiguousSpan.h"
#include "Shape.h"
-#if __cplusplus < 201402L
-#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
-#else
-#include <utility>
-#endif
-
#include <algorithm>
-#include <cassert>
-#include <type_traits>
#include <array>
-#include <tuple>
+#include <cassert>
#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
namespace ndarray
{
-// there is no index_sequence before c++14
-#if __cplusplus < 201402L
-
-template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
-
-template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
-
-#else
-
template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
-#endif //__cplusplus < 201402L
-
struct Strides
{
explicit Strides(Shape s) : _strides{} { fillStrides(s); }
diff --git a/runtime/libs/ndarray/src/Array.test.cpp b/runtime/libs/ndarray/src/Array.test.cpp
new file mode 100644
index 000000000..15e67600d
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.test.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayArrayTests, basic_data_test)
+{
+ float raw_data[] = {1, 2, 3, 4};
+ int32_t raw_data_int[] = {1, 2, 3, 4};
+ uint32_t raw_data_uint[] = {1, 2, 3, 4};
+ int8_t raw_data_int8[] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+ Array<int32_t> data22_int{raw_data_int, {2, 2}};
+ Array<uint32_t> data22_uint{raw_data_uint, {2, 2}};
+ Array<int8_t> data22_int8{raw_data_int8, {2, 2}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+ ASSERT_EQ(data22.shape().rank(), 2);
+ ASSERT_EQ(data22.shape().dim(0), 2);
+ ASSERT_EQ(data22.shape().dim(1), 2);
+
+ Array<float> data14{raw_data, {1, 4}};
+ ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+ ASSERT_EQ(data14.shape().rank(), 2);
+ ASSERT_EQ(data14.shape().dim(0), 1);
+ ASSERT_EQ(data14.shape().dim(1), 4);
+
+ // <float, false>
+ {
+ ContiguousSpan<float> cs = data22.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<float> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ float sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<float> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<float>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_FLOAT_EQ(cs4->at(3), 4);
+ }
+
+ // <float, true>
+ {
+ ContiguousSpan<float, true> cs = data22.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<float, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ float sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_FLOAT_EQ(sum, 10);
+
+ std::vector<float> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<float, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_FLOAT_EQ(cs4->at(3), 4);
+ }
+
+ // <int32_t, false>
+ {
+ ContiguousSpan<int32_t> cs = data22_int.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int32_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ int32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int32_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int32_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <int32_t, true>
+ {
+ ContiguousSpan<int32_t, true> cs = data22_int.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int32_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ int32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int32_t> array_data{1, 2, 3, 4};
+ auto cs3 =
+ std::make_unique<ContiguousSpan<int32_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <uint32_t, false>
+ {
+ ContiguousSpan<uint32_t> cs = data22_uint.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<uint32_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ uint32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<uint32_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<uint32_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ }
+
+ // <uint32_t, true>
+ {
+ ContiguousSpan<uint32_t, true> cs = data22_uint.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<uint32_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ uint32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<uint32_t> array_data{1, 2, 3, 4};
+ auto cs3 =
+ std::make_unique<ContiguousSpan<uint32_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <int8_t, false>
+ {
+ ContiguousSpan<int8_t> cs = data22_int8.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int8_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ int8_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int8_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int8_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+
+ auto cs5 = ContiguousSpan<int8_t>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs5.size(), 4);
+ ASSERT_EQ(cs5.at(3), 4);
+ }
+
+ // <int8_t, true>
+ {
+ ContiguousSpan<int8_t, true> cs = data22_int8.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int8_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ int8_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int8_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int8_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+
+ auto cs5 = ContiguousSpan<int8_t, true>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs5.size(), 4);
+ ASSERT_EQ(cs5.at(3), 4);
+ }
+
+ Array<float> lv = std::move(data14);
+ ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArrayArrayTests, slice_write_test)
+{
+ // float
+ {
+ float raw_data[4] = {0};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[4] = {0};
+ Array<int32_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[4] = {0};
+ Array<uint32_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[4] = {0};
+ Array<int8_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+}
+
+TEST(NDArrayArrayTests, slice_read_test)
+{
+ // float
+ {
+ float raw_data[4] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_FLOAT_EQ(slice[0], 3);
+ ASSERT_FLOAT_EQ(slice[1], 4);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<int32_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<uint32_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<int8_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+}
+
+TEST(NDArrayArrayTests, multidim_test)
+{
+ // float
+ {
+ float raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<int32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<uint32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<int8_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+}
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.test.cpp b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
new file mode 100644
index 000000000..dd1108697
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayContiguousSpanTests, slice_assign_test)
+{
+ // float
+ {
+ std::vector<float> v1{1, 2, 3, 4, 5};
+ std::vector<float> v2(5);
+
+ ContiguousSpan<float> span1(v1.begin(), v1.end());
+ ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<float> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<float, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<float, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+
+ ASSERT_EQ(*(span5.data() + 2), *(span5.data() + 2));
+ }
+
+ // int32_t
+ {
+ std::vector<int32_t> v1{1, 2, 3, 4, 5};
+ std::vector<int32_t> v2(5);
+
+ ContiguousSpan<int32_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<int32_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<int32_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<int32_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<int32_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+
+ // uint32_t
+ {
+ std::vector<uint32_t> v1{1, 2, 3, 4, 5};
+ std::vector<uint32_t> v2(5);
+
+ ContiguousSpan<uint32_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<uint32_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<uint32_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<uint32_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<uint32_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+
+ // int8_t
+ {
+ std::vector<int8_t> v1{1, 2, 3, 4, 5};
+ std::vector<int8_t> v2(5);
+
+ ContiguousSpan<int8_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<int8_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<int8_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<int8_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<int8_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+}
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
deleted file mode 100644
index 8b78fb985..000000000
--- a/runtime/libs/ndarray/src/detail/cxx14.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CXX14_H_
-#define _NDARRAY_CXX14_H_
-
-namespace ndarray
-{
-
-namespace cxx14
-{
-
-template <size_t... Nums> struct index_sequence
-{
- using value_type = size_t;
-
- static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
-};
-
-namespace detail
-{
-
-template <size_t v, typename Seq> struct _append;
-
-template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
-{
- using result = index_sequence<Nums..., v>;
-};
-
-template <size_t Len> struct make_index_sequence
-{
- using result =
- typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
-};
-
-template <> struct make_index_sequence<1>
-{
- using result = index_sequence<0>;
-};
-
-template <> struct make_index_sequence<0>
-{
- using result = index_sequence<>;
-};
-
-} // namespace detail
-
-template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
-
-} // namespace cxx14
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
deleted file mode 100644
index be1ed6510..000000000
--- a/runtime/libs/ndarray/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-if(NOT TARGET ndarray)
- return()
-endif()
-
-add_executable(ndarray_test ndarray_test.cpp)
-
-target_link_libraries(ndarray_test PRIVATE ndarray)
-
-nnfw_find_package(GTest)
-if(NOT GTest_FOUND)
- message(STATUS "GTest not avaialble. Skipping NDArray test build")
- return()
-endif(NOT GTest_FOUND)
-
-target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
-
-add_test(ndarray_test ndarray_test)
-install(TARGETS ndarray_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
deleted file mode 100644
index 4b5ad5765..000000000
--- a/runtime/libs/ndarray/test/ndarray_test.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "ndarray/Array.h"
-
-using namespace ndarray;
-
-TEST(NDArray_tests, basic_data_test)
-{
-
- float raw_data[] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
- ASSERT_EQ(data22.shape().rank(), 2);
- ASSERT_EQ(data22.shape().dim(0), 2);
- ASSERT_EQ(data22.shape().dim(1), 2);
-
- Array<float> data14{raw_data, {1, 4}};
- ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
- ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
- ASSERT_EQ(data14.shape().rank(), 2);
- ASSERT_EQ(data14.shape().dim(0), 1);
- ASSERT_EQ(data14.shape().dim(1), 4);
-
- ContiguousSpan<float> cs = data22.flat();
- ASSERT_EQ(cs.size(), 4);
- ASSERT_FLOAT_EQ(cs.at(3), 4);
-
- Array<float> lv = std::move(data14);
- ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
- ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
- ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
- ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
-}
-
-TEST(NDArray_tests, slice_write_test)
-{
- float raw_data[4] = {0};
-
- Array<float> data22{raw_data, {2, 2}};
-
- data22.slice(1) = {1, 2};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
-}
-
-TEST(NDArray_tests, slice_read_test)
-{
- float raw_data[4] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- auto slice = data22.slice(1);
-
- ASSERT_FLOAT_EQ(slice[0], 3);
- ASSERT_FLOAT_EQ(slice[1], 4);
-}
-
-TEST(NDArray_tests, multidim_test)
-{
- float raw_data[5] = {0, 1, 2, 3, 4};
-
- Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
-}
-
-TEST(NDArray_tests, slice_assign_test)
-{
- std::vector<float> v1{1, 2, 3, 4, 5};
- std::vector<float> v2(5);
-
- ContiguousSpan<float> span1(v1.begin(), v1.end());
- ContiguousSpan<float> span2(v2.begin(), v2.end());
-
- span2.assign(span1);
-
- ASSERT_EQ(v1, v2);
- ASSERT_EQ(span1.size(), 5);
- ASSERT_EQ(span2.size(), 5);
-
- ASSERT_EQ(span2.at(2), 3);
- ASSERT_EQ(span2.at(4), 5);
-
- ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
-
- ContiguousSpan<float> span3(span2.offset(1));
- ASSERT_EQ(span3.size(), 4);
- ASSERT_EQ(span3.at(0), 2);
- ASSERT_EQ(span3.at(1), 3);
- ASSERT_EQ(span3.at(2), 4);
- ASSERT_EQ(span3.at(3), 5);
-}
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt
index 88d52a5bd..3c9ca99da 100644
--- a/runtime/onert/CMakeLists.txt
+++ b/runtime/onert/CMakeLists.txt
@@ -7,9 +7,3 @@ add_subdirectory(frontend)
add_subdirectory(core)
add_subdirectory(api)
add_subdirectory(sample)
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-add_subdirectory(test)
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index beb243a4d..badd5d133 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -10,6 +10,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc)
target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 6f296a931..658cba4d5 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -193,7 +193,7 @@ typedef struct nnfw_tensorinfo
* And inference is performed after {@link nnfw_run} is invoked.
*
* <p>{@link nnfw_close_session} should be called once
- * if session is no longer need
+ * if session is no longer needed
*
* @param[out] session The session to be created
* @return NNFW_STATUS_NO_ERROR if successful
@@ -213,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session);
/**
* @brief Load model from nnpackage file or directory
*
- * The length of \p package_file_path must not execeed 1024 bytes including zero at the end.
+ * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
*
* @param[in] session nnfw_session loading the given nnpackage file/dir
* @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 45b34716a..2fbb96f31 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001400
+#define NNFW_VERSION 0x01001500
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index 0ebd385e9..a0e6ee094 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -58,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0);
* @param session the session to be created
* @return NNFW_STATUS_NO_ERROR if successful
*/
-NNFW_STATUS nnfw_create_session(nnfw_session **session)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
-
- *session = new (std::nothrow) nnfw_session();
- if (*session == nullptr)
- return NNFW_STATUS_OUT_OF_MEMORY;
- return NNFW_STATUS_NO_ERROR;
-}
+NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); }
/*
* Close a session instance
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 62a043921..9b43dd381 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -25,6 +25,7 @@
#include "tflite_loader.h"
#include "trix_loader.h"
#include "json/json.h"
+#include "ir/NNPkg.h"
#include "ir/OpCode.h"
#include "util/TracingCtx.h"
@@ -110,9 +111,7 @@ std::string trim(const std::string &value)
return value.substr(begin, range);
}
-using CfgKeyValues = std::unordered_map<std::string, std::string>;
-
-bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues)
{
std::ifstream ifs(cfgfile);
if (ifs.is_open())
@@ -143,19 +142,6 @@ bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
return false;
}
-void setConfigKeyValues(const CfgKeyValues &keyValues)
-{
- auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
-
- for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
- {
- VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
- configsrc->set(it->first, it->second);
- }
-
- onert::util::config_source_ext(std::move(configsrc));
-}
-
NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
{
using onert::ir::DataType;
@@ -195,15 +181,59 @@ void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
ti->dtype = datatype_to_nnfw_dtype(dtype);
}
+std::unique_ptr<onert::ir::Model> loadModel(const std::string filename,
+ const std::string model_type)
+{
+ if (model_type == "tflite")
+ return onert::tflite_loader::loadModel(filename.c_str());
+ if (model_type == "circle")
+ return onert::circle_loader::loadModel(filename.c_str());
+ if (model_type == "tvn")
+ return onert::trix_loader::loadModel(filename.c_str());
+
+ std::cerr << "Unsupported model type" << std::endl;
+ return std::unique_ptr<onert::ir::Model>(nullptr);
+}
+
} // namespace
nnfw_session::nnfw_session()
- : _subgraphs{nullptr}, _compiler{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr}
+ : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr},
+ _kernel_registry{nullptr}
{
// DO NOTHING
}
+NNFW_STATUS nnfw_session::create(nnfw_session **session)
+{
+ if (session == nullptr)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ // Create session
+ *session = new (std::nothrow) nnfw_session();
+ if (*session == nullptr)
+ {
+ std::cerr << "Error during session creation" << std::endl;
+ return NNFW_STATUS_OUT_OF_MEMORY;
+ }
+
+ // Initialize fields
+ try
+ {
+ (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during session initialization : " << e.what() << std::endl;
+ delete *session;
+ *session = nullptr;
+
+ return NNFW_STATUS_ERROR;
+ }
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
nnfw_session::~nnfw_session() = default;
NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
@@ -219,19 +249,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
try
{
- _subgraphs = onert::circle_loader::loadModel(buffer, size);
+ auto model = onert::circle_loader::loadModel(buffer, size);
+ _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -247,45 +274,28 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
}
std::string filename{model_file_path};
- if (filename.size() < 8) // .tflite or .circle
+ // TODO: Use std::filesystem::path when we can use c++17.
+ auto dotidx = filename.find_last_of('.');
+ if (dotidx == std::string::npos)
{
- std::cerr << "Invalid model file path." << std::endl;
+ std::cerr << "Invalid model file path. Please use file with extension." << std::endl;
return NNFW_STATUS_ERROR;
}
-
- std::string model_type = filename.substr(filename.size() - 7, 7);
-
+ std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot
try
{
- if (model_type == ".tflite")
- {
- _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
- }
- else if (model_type == ".circle")
- {
- _subgraphs = onert::circle_loader::loadModel(filename.c_str());
- }
- else if (model_type == ".tvn")
- {
- _subgraphs = onert::trix_loader::loadModel(filename.c_str());
- }
- else
- {
- std::cerr << "Unsupported model type" << std::endl;
+ auto model = loadModel(filename, model_type);
+ if (model == nullptr)
return NNFW_STATUS_ERROR;
- }
+ _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -334,45 +344,59 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
{
auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
- CfgKeyValues keyValues;
+ onert::util::CfgKeyValues keyValues;
if (loadConfigure(filepath, keyValues))
{
- setConfigKeyValues(keyValues);
+ onert::util::setConfigKeyValues(keyValues);
}
}
-
- auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model
- auto model_type = model_types[0].asString(); // first model's type
- if (model_type == "tflite")
+ _nnpkg = std::make_shared<onert::ir::NNPkg>();
+ for (uint32_t i = 0; i < models.size(); ++i)
{
- _subgraphs = onert::tflite_loader::loadModel(model_file_path);
- }
- else if (model_type == "circle")
- {
- _subgraphs = onert::circle_loader::loadModel(model_file_path);
- }
- else if (model_type == "tvn")
- {
- _subgraphs = onert::trix_loader::loadModel(model_file_path);
+ auto model_file_path = package_path + std::string("/") + models[i].asString();
+ auto model_type = model_types[i].asString();
+ auto model = loadModel(model_file_path, model_type);
+ if (model == nullptr)
+ return NNFW_STATUS_ERROR;
+ model->primary_subgraph()->bindKernelBuilder(_kernel_registry->getBuilder());
+ _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
}
- else
+
+ auto toIODesc = [](std::string str) {
+ auto indices = nnfw::misc::split(str, ':');
+ if (indices.size() != 3)
+ {
+ std::cerr << "IODesc should be 3-tuple." << std::endl;
+ return onert::ir::IODesc{};
+ }
+ auto model_idx = static_cast<uint32_t>(std::stoi(indices.at(0)));
+ auto subgraph_idx = static_cast<uint32_t>(std::stoi(indices.at(1)));
+ auto operand_idx = static_cast<uint32_t>(std::stoi(indices.at(2)));
+ return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx};
+ };
+ // read pkg-inputs and pkg-outputs
+ const Json::Value &pkg_inputs = root["pkg-inputs"];
+ for (uint32_t i = 0; i < pkg_inputs.size(); ++i)
+ _nnpkg->addInput(toIODesc(pkg_inputs[i].asString()));
+ const Json::Value &pkg_outputs = root["pkg-outputs"];
+ for (uint32_t i = 0; i < pkg_outputs.size(); ++i)
+ _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString()));
+ // read model-connect
+ const Json::Value &fromtos = root["model-connect"];
+ for (uint32_t i = 0; i < fromtos.size(); ++i)
{
- std::cerr << "Unsupported model type in MANIFEST" << std::endl;
- return NNFW_STATUS_ERROR;
+ const Json::Value &tos = fromtos[i]["to"];
+ for (uint32_t j = 0; j < tos.size(); ++j)
+ _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
}
- _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -396,9 +420,17 @@ NNFW_STATUS nnfw_session::prepare()
try
{
- _subgraphs.reset();
- std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
- _execution = std::make_unique<onert::exec::Execution>(executors);
+ // TODO: Compile all models in case of multiple models
+ if (_nnpkg->model_count() > 2)
+ {
+ std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+ _nnpkg.reset();
+ _compiler_artifact = compiler->compile();
+ _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
}
catch (const std::exception &e)
{
@@ -430,13 +462,14 @@ NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
try
{
- _subgraphs.reset();
- std::vector<std::shared_ptr<onert::exec::ExecutorMap>> executor_maps =
- _compiler->compile(_package_file_path.c_str(), map_file_path);
+ auto model = _nnpkg->primary_model();
+ auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
+ _nnpkg.reset();
+ auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
- for (auto it = executor_maps.begin(); it != executor_maps.end(); ++it)
+ for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
{
- _executions.push_back(std::make_shared<onert::exec::Execution>(*it));
+ _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
}
make_dependency();
_threads.resize(_executions.size());
@@ -740,7 +773,8 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto primary_subgraph = _subgraphs->primary();
+ auto model = _nnpkg->primary_model();
+ auto primary_subgraph = model->primary_subgraph();
auto ind = primary_subgraph->getInputs().at(index);
auto &input = primary_subgraph->operands().at(ind);
@@ -851,12 +885,12 @@ void nnfw_session::make_dependency()
{
for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
{
- auto out_graph = _executions[out_exe]->primary_subgraph();
+ auto &out_graph = _executions[out_exe]->primary_subgraph();
for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
{
if (out_exe == in_exe)
continue;
- auto in_graph = _executions[in_exe]->primary_subgraph();
+ auto &in_graph = _executions[in_exe]->primary_subgraph();
for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
out++)
{
@@ -971,7 +1005,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
return NNFW_STATUS_ERROR;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
using namespace onert::util;
@@ -1005,7 +1039,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
return NNFW_STATUS_ERROR;
}
- auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend;
+ auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend;
opcode_to_backend.emplace(onert::ir::toOpCode(key), backend);
}
catch (const std::exception &e)
@@ -1024,7 +1058,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
if (!key || !value)
return NNFW_STATUS_UNEXPECTED_NULL;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
using namespace onert::util;
@@ -1067,14 +1101,14 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
const onert::ir::Graph *nnfw_session::primary_subgraph()
{
- if (_subgraphs)
+ if (_nnpkg != nullptr)
{
- assert(!_execution && _executions.empty());
- return _subgraphs->primary().get();
+ assert(_execution == nullptr && _executions.empty());
+ return _nnpkg->primary_model()->primary_subgraph().get();
}
else
{
- assert(_execution || !_executions.empty());
+ assert(_execution != nullptr || !_executions.empty());
// TODO Remove const_cast
// We assumed the graph will not change after compilation, but shape could change
if (!_executions.empty())
@@ -1094,7 +1128,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
if (!key || !value)
return NNFW_STATUS_UNEXPECTED_NULL;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
auto check_boundary = [](size_t dest_size, std::string &src) {
if (dest_size < src.length() + 1 /* for '\0' */)
@@ -1138,9 +1172,9 @@ bool nnfw_session::isStateInitialized()
{
if (_state == State::INITIALIZED)
{
- assert(!_subgraphs);
- assert(!_compiler);
- assert(!_execution && _executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(_coptions.empty());
+ assert(_execution == nullptr && _executions.empty());
return true;
}
else
@@ -1153,9 +1187,9 @@ bool nnfw_session::isStateModelLoaded()
{
if (_state == State::MODEL_LOADED)
{
- assert(_subgraphs);
- assert(_compiler);
- assert(!_execution && _executions.empty());
+ assert(_nnpkg != nullptr);
+ assert(!_coptions.empty());
+ assert(_execution == nullptr && _executions.empty());
return true;
}
else
@@ -1168,9 +1202,9 @@ bool nnfw_session::isStatePrepared()
{
if (_state == State::PREPARED)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
else
@@ -1183,9 +1217,9 @@ bool nnfw_session::isStateRunning()
{
if (_state == State::RUNNING)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
return false;
@@ -1195,9 +1229,9 @@ bool nnfw_session::isStateFinishedRun()
{
if (_state == State::FINISHED_RUN)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
else
@@ -1224,9 +1258,14 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i
NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings)
{
if (backend_settings == NULL)
- {
return NNFW_STATUS_ERROR;
- }
- _compiler->set_backend_from_str(backend_settings);
+
+ if (!isStateModelLoaded())
+ return NNFW_STATUS_INVALID_STATE;
+
+ // Backend for all
+ auto &ms_options = _coptions[0]->manual_scheduler_options;
+ ms_options.setBackendMap(std::string{backend_settings});
+
return NNFW_STATUS_NO_ERROR;
}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 6d75d894f..9b729fd5f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -20,7 +20,6 @@
#include "nnfw.h"
#include "nnfw_experimental.h"
-#include <util/GeneralConfigSource.h>
#include <util/TracingCtx.h>
#include <string>
@@ -41,11 +40,13 @@ class Execution;
namespace ir
{
class Graph;
-class Subgraphs;
+class Model;
+class NNPkg;
} // namespace ir
namespace compiler
{
-class Compiler;
+struct CompilerArtifact;
+class CompilerOptions;
} // namespace compiler
} // namespace onert
@@ -97,9 +98,18 @@ private:
};
public:
+ /**
+ * @brief Factory method. It creates and initialize nnfw_session
+ *
+ * @note Use factory instead of constructor to get status
+ */
+ static NNFW_STATUS create(nnfw_session **session);
+
+private:
nnfw_session();
- ~nnfw_session();
+public:
+ ~nnfw_session();
NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
NNFW_STATUS prepare();
NNFW_STATUS prepare_pipeline(const char *map_file_path);
@@ -148,6 +158,10 @@ public:
NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+ /**
+ * @brief Set backends with string-encoded mapping from operation index to backend type
+ * (cpu, acl_cl)
+ */
NNFW_STATUS set_backends_per_operation(const char *backend_settings);
private:
@@ -161,15 +175,14 @@ private:
private:
State _state{State::INITIALIZED};
- std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
- std::unique_ptr<onert::compiler::Compiler> _compiler;
+ std::shared_ptr<onert::ir::NNPkg> _nnpkg;
+ std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> _coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> _compiler_artifact;
std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
std::vector<std::thread> _threads;
std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
std::string _package_file_path;
-
- std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
};
#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 945ad83bb..301ded01f 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -46,8 +46,10 @@ public:
{
const auto &graph = *data.graph;
const auto &operands = data.graph->operands();
+ const auto is_linear_executor = data.is_linear_executor;
+
auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
- auto tm = createTensorManager(data.is_linear_executor);
+ auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 62b163b11..1c7713055 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -46,8 +46,10 @@ public:
{
const auto &graph = *data.graph;
const auto &operands = data.graph->operands();
+ const auto is_linear_executor = data.is_linear_executor;
+
auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
- auto tm = createTensorManager(data.is_linear_executor);
+ auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index b61e58251..99643b983 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -6,7 +6,7 @@ file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
-target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index ab0bb5f10..6ed4799a8 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -20,6 +20,8 @@
#include <util/ConfigSource.h>
#include <ruy/context.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 75274dc88..762ee7392 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -244,17 +244,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2255d5e9f..4672fe406 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -63,7 +63,7 @@ void ConvolutionLayer::convFloat32()
getBuffer<float>(_output));
}
-void ConvolutionLayer::convQuant8()
+void ConvolutionLayer::convQ8uPerTensor()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -99,7 +99,33 @@ void ConvolutionLayer::convQuant8()
getBuffer<uint8_t>(_output));
}
-void ConvolutionLayer::convQuant8PerChannel()
+void ConvolutionLayer::convQ8uPerChannel()
+{
+ nnfw::cker::ConvParams op_params;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ // NOTE: The following fields of ConvParams are not used:
+ // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+ nnfw::cker::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+ getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias),
+ getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQ8i()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -189,11 +215,15 @@ void ConvolutionLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- convQuant8();
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ convQ8uPerChannel();
+ else
+ convQ8uPerTensor();
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
- convQuant8PerChannel();
+ convQ8i();
}
else
{
@@ -210,8 +240,8 @@ void ConvolutionLayer::prepare()
if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
{
bool is_transposed = false;
- kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
- is_transposed, _dilationWidthFactor, _dilationHeightFactor);
+ kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+ is_transposed, _dilationWidthFactor, _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
@@ -225,8 +255,20 @@ void ConvolutionLayer::prepare()
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
!_input->is_dynamic() && !_output->is_dynamic())
{
- kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
- _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ {
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+ kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+ }
+ else
+ {
+ kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output),
+ _strideWidth, _strideHeight, _dilationWidthFactor,
+ _dilationHeightFactor);
+ }
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 5d7f7c296..9f5253c8e 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -50,9 +50,10 @@ public:
public:
void convFloat32();
- void convQuant8();
+ void convQ8uPerTensor();
+ void convQ8uPerChannel();
- void convQuant8PerChannel();
+ void convQ8i();
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, ir::PaddingType _paddingType,
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index 30641ecae..8a48497d5 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -49,7 +49,7 @@ void DepthwiseConvolutionLayer::convFloat32()
getBuffer<float>(_output), _external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::convQuant8()
+void DepthwiseConvolutionLayer::convQ8uPerTensor()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -84,11 +84,39 @@ void DepthwiseConvolutionLayer::convQuant8()
getBuffer<uint8_t>(_output), _external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::convQuant8PerChannel()
+void DepthwiseConvolutionLayer::convQ8uPerChannel()
+{
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidth;
+ op_params.dilation_height_factor = _dilationHeight;
+ op_params.depth_multiplier = _multiplier;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ // NOTE: The following fields of ConvParams are not used:
+ // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+ nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
+ op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+ getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
+ _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
+ getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void DepthwiseConvolutionLayer::convQ8i()
{
if (!_prepared)
{
- prepareQuant8PerChannel();
+ prepareQ8i();
_prepared = true;
}
@@ -119,7 +147,15 @@ void DepthwiseConvolutionLayer::convQuant8PerChannel()
_external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::prepareQuant8PerChannel()
+void DepthwiseConvolutionLayer::prepareQ8i()
+{
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+ _per_channel_output_shift);
+}
+
+void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
{
GetQuantizedConvolutionMultipliersAndShifts(
_input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
@@ -155,7 +191,17 @@ void DepthwiseConvolutionLayer::configure(
{
if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
{
- prepareQuant8PerChannel();
+ prepareQ8i();
+ _prepared = true;
+ }
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
+ !_input->is_dynamic() && !_output->is_dynamic())
+ {
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ {
+ prepareQ8uPerChannel();
_prepared = true;
}
}
@@ -169,11 +215,15 @@ void DepthwiseConvolutionLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- convQuant8();
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ convQ8uPerChannel();
+ else
+ convQ8uPerTensor();
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
- convQuant8PerChannel();
+ convQ8i();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index 720550636..5c910109a 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -40,9 +40,10 @@ public:
public:
void convFloat32();
- void convQuant8();
+ void convQ8uPerTensor();
+ void convQ8uPerChannel();
- void convQuant8PerChannel();
+ void convQ8i();
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, const uint32_t paddingLeft,
@@ -55,7 +56,8 @@ public:
void run() override;
private:
- void prepareQuant8PerChannel();
+ void prepareQ8i();
+ void prepareQ8uPerChannel();
private:
const IPortableTensor *_input{nullptr};
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
index 8a6fe6504..d89741c86 100644
--- a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -121,7 +121,9 @@ Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<fl
assert(box.y2 > box.y1);
}
- return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape());
+ auto decoded_boxes_a_shape = decoded_boxes_a.shape();
+
+ return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape);
}
}
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
index 3cc4eaa5a..c73ae636e 100644
--- a/runtime/onert/backend/ruy/ExternalContext.h
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -20,6 +20,8 @@
#include <util/ConfigSource.h>
#include <ruy/context.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
index c2f6a1f79..b2bbf9bfc 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -42,17 +42,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
index 5455757ca..a94be247d 100644
--- a/runtime/onert/backend/trix/CMakeLists.txt
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -1,6 +1,6 @@
set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
if(NOT TRIXEngine_FOUND)
return()
endif(NOT TRIXEngine_FOUND)
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
index 482932fd4..a7dbd7a59 100644
--- a/runtime/onert/backend/trix/DevContext.h
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -32,28 +32,42 @@ public:
DevContext()
{
auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+ // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
+ if (device_count > 2)
+ {
+ device_count = 2;
+ }
+
if (device_count <= 0)
{
- throw std::runtime_error("Unable to find TRIV2 NPU device");
+ throw std::runtime_error("Unable to find TRIX NPU device");
}
- // Use NPU 0 device
- if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+ for (int i = 0; i < device_count; i++)
{
- throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+ npudev_h h;
+ if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+ {
+ throw std::runtime_error("Failed to get TRIX NPU device handle");
+ }
+ _dev_handles.push_back(h);
}
}
~DevContext()
{
- if (_dev_handle != nullptr)
+ for (auto h : _dev_handles)
{
- unregisterNPUmodel_all(_dev_handle);
- putNPUdevice(_dev_handle);
+ if (h != nullptr)
+ {
+ unregisterNPUmodel_all(h);
+ putNPUdevice(h);
+ }
}
}
- npudev_h getDev() { return _dev_handle; }
+ npudev_h getDev(int i) { return _dev_handles[i]; }
+ int getDevSize() { return _dev_handles.size(); }
template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
{
@@ -66,14 +80,15 @@ public:
}
}
- template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+ template <typename T>
+ void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
{
buf->num_buffers = static_cast<uint32_t>(tensors.size());
for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
{
- buf->bufs[idx].addr = tensors[idx]->buffer();
- buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+ buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
+ buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
buf->bufs[idx].type = BUFFER_MAPPED;
}
}
@@ -106,9 +121,8 @@ private:
}
private:
- // NPU device handle
- // TODO Support multicore npu device
- npudev_h _dev_handle;
+ // NPU device handles
+ std::vector<npudev_h> _dev_handles;
};
} // namespace trix
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
index 71fdf3f0d..3c49da9a3 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.cc
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -18,6 +18,7 @@
#include <util/logging.h>
#include <libnpuhost.h>
+#include <future>
namespace onert
{
@@ -49,24 +50,56 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
throw std::runtime_error("Unable to extract the model metadata");
}
+ _model_id.resize(_dev_context->getDevSize());
+
generic_buffer model_file;
model_file.type = BUFFER_FILE;
model_file.filepath = binary_path.c_str();
model_file.size = _meta->size;
- if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
+ {
+ if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
+ {
+ throw std::runtime_error("Failed to register npu model");
+ }
+ }
+}
+
+void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
+ output_buffers *output_buf, tensors_data_info *out_info)
+{
+ if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
+ {
+ throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
+ ")");
+ }
+
+ if (submitNPU_request(dev, req_id))
{
- throw std::runtime_error("Failed to register npu model");
+ throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+ ")");
}
}
void BulkLayer::run()
{
- int req_id;
- if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+ // TODO: Remove too many assumption
+ // We assume user wants batch execution if user's input size is multiples of model's input size
+ int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
+ int model_input_batch = _meta->input_seg_dims[0][0];
+ int batch_size = user_input_batch / model_input_batch;
+ bool is_batch_execution = (batch_size != 1 ? true : false);
+
+ std::vector<int> req_id(_dev_context->getDevSize());
+
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
{
- throw std::runtime_error("Unable to create NPU request with model id (" +
- std::to_string(_model_id) + ")");
+ if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
+ {
+ throw std::runtime_error("Unable to create NPU request with model id (" +
+ std::to_string(_model_id[i]) + ")");
+ }
}
if (_meta->input_seg_num != _inputs.size())
@@ -84,28 +117,58 @@ void BulkLayer::run()
_dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
_dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
- input_buffers input_buf;
- output_buffers output_buf;
- _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
- _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+ std::vector<input_buffers> input_buf;
+ std::vector<output_buffers> output_buf;
+ input_buf.resize(_dev_context->getDevSize());
+ output_buf.resize(_dev_context->getDevSize());
+
+ std::vector<std::future<void>> f(_dev_context->getDevSize());
- if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
- &out_info))
+ const int num_cores = _dev_context->getDevSize();
+ if (is_batch_execution)
{
- throw std::runtime_error("Unable to create NPU request for model id (" +
- std::to_string(_model_id) + ")");
+ // TODO: Support for general number of cores(>2)
+ // Here we assume that 2 trix cores
+ for (int i = 0; i < (batch_size); i = i + num_cores)
+ {
+ for (int core = 0; core < num_cores; core++)
+ {
+ _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
+ i + core);
+ _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
+ }
+ for (int core = 0; core < num_cores; core++)
+ {
+
+ if (i + core < batch_size)
+ {
+ f[core] =
+ std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
+ &input_buf[core], &in_info, &output_buf[core], &out_info);
+ }
+ }
+ for (int core = 0; core < num_cores; core++)
+ {
+ f[core].wait();
+ }
+ }
}
-
- if (submitNPU_request(_dev_context->getDev(), req_id))
+ else
{
- throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
- ")");
+ _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
+ _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
+
+ single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
+ &out_info);
}
- if (removeNPU_request(_dev_context->getDev(), req_id))
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
{
- throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
- ")");
+ if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
+ {
+ throw std::runtime_error("Unable to remove NPU request with req id (" +
+ std::to_string(req_id[i]) + ")");
+ }
}
}
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
index f7080ccad..614c0f728 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.h
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -50,7 +50,7 @@ private:
std::vector<const IPortableTensor *> _inputs;
std::vector<IPortableTensor *> _outputs;
- uint32_t _model_id;
+ std::vector<uint32_t> _model_id;
npubin_meta *_meta;
std::shared_ptr<DevContext> _dev_context;
};
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
index 28f729d77..9580bec8c 100644
--- a/runtime/onert/backend/xnnpack/KernelGenerator.cc
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -56,17 +56,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index 6dbadf80b..87c7a13e4 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -6,14 +6,18 @@ nnfw_find_package(Ruy REQUIRED)
add_library(onert_core SHARED ${SOURCES})
set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+# NOTE
+# We publish public headers into developer package.
+# To avoid mistake using private header in public header, do not define
+# private target_include_directories scope for src/ directory.
target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-target_link_libraries(onert_core PUBLIC nnfw_lib_misc half)
-target_link_libraries(onert_core PRIVATE nnfw_lib_cker)
+
+target_link_libraries(onert_core PRIVATE jsoncpp half)
+target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker)
target_link_libraries(onert_core PRIVATE nnfw_common)
target_link_libraries(onert_core PRIVATE nnfw_coverage)
target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
-target_link_libraries(onert_core PRIVATE jsoncpp)
target_link_libraries(onert_core PRIVATE ruy)
target_link_libraries(onert_core INTERFACE ruy_instrumentation)
@@ -48,6 +52,8 @@ set(TEST_ONERT_CORE test_onert_core)
add_executable(${TEST_ONERT_CORE} ${TESTS})
target_link_libraries(${TEST_ONERT_CORE} onert_core)
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 0a4d9c814..560416264 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -20,6 +20,7 @@
#include <cstring>
#include <cstdint>
#include <functional>
+#include <stdexcept>
#include "ir/DataType.h"
#include "ir/Layout.h"
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 58bfe3406..cf2da4c34 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -103,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_ind : order)
+ for (const auto &op_ind : order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
@@ -161,7 +161,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
}
}
- for (auto ind : operands_last_until_end)
+ for (auto &ind : operands_last_until_end)
{
tensor_builder->notifyLastUse(ind);
}
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index befe40022..b44fcf836 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -17,12 +17,11 @@
#ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__
#define __ONERT_COMPILER_BACKEND_MANAGER_H__
-#include <memory>
-#include <map>
-
-#include "ir/Operands.h"
#include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
+#include "ir/Operands.h"
+
+#include <map>
+#include <memory>
namespace onert
{
@@ -41,7 +40,7 @@ public:
public:
backend::Backend *get(const std::string &key);
const backend::Backend *get(const std::string &key) const;
- const backend::builtin::Backend *getBuiltin() const;
+ const backend::Backend *getBuiltin() const;
const std::vector<const backend::Backend *> getAll() const
{
std::vector<const backend::Backend *> v;
@@ -65,7 +64,7 @@ private:
private:
std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
- backend::builtin::Backend *_builtin{nullptr};
+ backend::Backend *_builtin{nullptr};
/**
* @brief load builtin backend
*
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 292de4b12..f05d63c66 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -22,8 +22,8 @@
#ifndef __ONERT_COMPILER_COMPILE_H_
#define __ONERT_COMPILER_COMPILE_H_
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
+#include "ir/NNPkg.h"
+#include "exec/Executors.h"
#include "util/TracingCtx.h"
namespace onert
@@ -40,6 +40,10 @@ enum class State
struct ManualSchedulerOptions
{
+public:
+ void setBackendMap(const std::string &str);
+
+public:
std::string backend_for_all;
std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
@@ -50,8 +54,14 @@ struct PartialGraphOptions
std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
};
-struct CompilerOptions
+class CompilerOptions
{
+public:
+ // Set default values for CompilerOptions
+ // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
+ static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+public:
// GENERAL OPTIONS
std::vector<std::string> backend_list;
@@ -65,75 +75,85 @@ struct CompilerOptions
bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
bool fp16_enable; //< Whether fp16 mode ON/OFF
PartialGraphOptions partial_graph_options;
-
- util::TracingCtx *tracing_ctx; //< Profiling information
};
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
+struct CompilerArtifact
+{
+ CompilerArtifact(void) = delete;
+ CompilerArtifact(std::shared_ptr<exec::Executors> executors,
+ std::unique_ptr<const util::TracingCtx> tracing_ctx)
+ : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+ std::shared_ptr<exec::Executors> _executors;
+ std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
/**
- * @brief Class to compile graph model
+ * @brief Class to compile NN package
*/
class Compiler
{
public:
/**
- * @brief Construct a new Compiler object
- * @param[in] subgs All subgraphs of a model
- * @param[in] tracing_ctx Profiling information
+ * @brief Construct a new Compiler object for single model
+ * @param[in] model model to compile
+ * @param[in] coptions Compiler Options
+ */
+ Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt);
+
+ /**
+ * @brief Construct a new Compiler object for NN package
+ * @param[in] nnpkg NN package to compile
+ * @param[in] coptions Compiler option vector for each model in package
*/
- Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
+ Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
public:
/**
* @brief Do compilation with the options
*
- * @return std::shared_ptr<exec::ExecutorMap> Executors as a result of compilation
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
*/
- std::shared_ptr<exec::ExecutorMap> compile(void);
+ std::shared_ptr<CompilerArtifact> compile(void);
/**
* @brief Do compilation with the options
*
- * @return std::vector<std::shared_ptr<exec::ExecutorMap>> Executors as a result of compilation
+ * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
* for pipeline
*/
- std::vector<std::shared_ptr<exec::ExecutorMap>> compile(const char *package_file_path,
- const char *map_file_path);
+ std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
+ const char *map_file_path);
State state(void) const { return _state; }
- CompilerOptions &options() { return _options; }
-
/**
* @brief Allow to compute float32 using float16 data type
*/
void enableToFp16();
/**
- * @brief Set backends from string-encoded mappings from operation index to backend type (cpu,
- * acl_cl)
- */
- void set_backend_from_str(const char *backend_settings);
-
- /**
* @brief Build the partial graphs to compile with original graph
*/
bool buildPartialGraph(uint32_t num_graphs);
private:
void checkProfilerConditions();
- std::shared_ptr<ir::Graph> &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); }
+ std::shared_ptr<ir::Graph> &primary_subgraph()
+ {
+ return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+ }
private:
- std::shared_ptr<ir::Subgraphs> _subgraphs;
+ std::shared_ptr<ir::NNPkg> _nnpkg;
// NOTE These executors does not have duplicated subgraph. This mean they do not allow support
// subgraphs being called recursively because data of non-constant tensor of parent executor will
// be updated by child executor. If you want to support subgraphs being called recursively, you
// have to add allocate non-constant tensor memory of executors in execution time when each
// subgraph is called.
State _state;
- CompilerOptions _options;
+ std::vector<CompilerOptions *> _voptions;
};
} // namespace compiler
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index 10ca8e9fc..7264f2a10 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -60,9 +60,14 @@ public:
private:
void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
void dumpLowerInfo();
- void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+ void lowerGraph(const compiler::CompilerOptions &options);
private:
+ /**
+ * @brief Copy of target graph for lowering
+ * @note It uses copy of graph, not reference.
+ * It allows the original graph can be compiled multiple times.
+ */
ir::Graph _graph;
ir::Graph _parent_graph;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index b2272e262..f701dc207 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -28,6 +28,36 @@ namespace onert
{
namespace compiler
{
+/**
+ * @brief Class that observe and update operands.
+ */
+class OperandObserver
+{
+public:
+ /**
+ * @brief Constructor of OperandObserver
+ *
+ * @param operands Operands to be updated
+ */
+ OperandObserver(const std::vector<ir::Operand *> &operands) : _operands{operands} {}
+ /**
+ * @brief Destructor of OperandObserver
+ */
+ virtual ~OperandObserver() = default;
+
+public:
+ /**
+ * @brief Update Shape and some OperandInfo of operands
+ *
+ * @param operands Operands to be updated
+ * @param unpredictable Whether runtime can predict shapes of operands in compilation time
+ */
+ void updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+ bool unpredictable = false);
+
+private:
+ std::vector<ir::Operand *> _operands;
+};
/**
* @brief Class to infer shape before running kernels. It does the following:
@@ -38,32 +68,42 @@ namespace compiler
class StaticShapeInferer : public ir::OperationVisitor
{
public:
- StaticShapeInferer(
- const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &lowered_subgs)
- : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
- _operations(lowered_subgs.at(subg_idx)->graph().operations()),
- _return_has_dynamic_tensor(false)
- { /* empty */
+ StaticShapeInferer(compiler::LoweredGraph *lowered_subg)
+ : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr},
+ _child_inferers{}
+ {
}
virtual ~StaticShapeInferer() = default;
public:
+ void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx,
+ std::unique_ptr<OperandObserver> &&subg_input_observer) noexcept
+ {
+ _subg_input_observers[subg_idx] = std::move(subg_input_observer);
+ }
+
+ void setControlflowOutputObserver(std::unique_ptr<OperandObserver> &&output_observer) noexcept
+ {
+ _controlflow_output_observer = std::move(output_observer);
+ }
+
+ void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer)
+ {
+ _child_inferers[subg_idx] = inferer;
+ }
+
/**
- * @brief Infer shape of operands beloning to ops and set the output shape.
+ * @brief Infer shape of operands belonging to ops and set the output shape.
* If output shape cannot be known without running op, mark it so that it can be allocated
* when running kernel.
- * @param op Operation
- * @return @c true if op's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::Operation &op);
+ void infer(void);
void dump();
private:
- void inferSubgraph(ir::SubgraphIndex subg_ind);
bool checkDynamicInput(const ir::Operation &op);
+ bool checkDynamicOutput(const ir::Operation &op);
void setDynamicOutput(const ir::Operation &op);
private:
@@ -113,6 +153,7 @@ private:
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
void visit(const ir::operation::DetectionPostProcess &op) override;
+ void visit(const ir::operation::Bulk &op) override;
private:
/**
@@ -128,12 +169,11 @@ private:
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
private:
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &_lowered_subgs;
- // _operands and _operations can be changed by controlflow operation
- ir::Operands &_operands; // operands of current subgraph
- ir::Operations &_operations; // operations of current subgraph
- bool _return_has_dynamic_tensor;
+ compiler::LoweredGraph *_lowered_subg;
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<OperandObserver>>
+ _subg_input_observers; // child subg input
+ std::unique_ptr<OperandObserver> _controlflow_output_observer; // parent controlflow op output
+ std::unordered_map<ir::SubgraphIndex, compiler::StaticShapeInferer *> _child_inferers;
};
} // namespace compiler
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
index b0a5cced3..1e8083c4c 100644
--- a/runtime/onert/core/include/exec/Execution.h
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -22,7 +22,7 @@
#define __ONERT_EXEC_EXECUTION_H__
#include "ir/Layout.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
#include "IODescription.h"
#include <thread>
@@ -46,7 +46,7 @@ public:
* @brief Construct a new Execution object
* @param[in] executor Model executor
*/
- Execution(const std::shared_ptr<ExecutorMap> &executors);
+ Execution(const std::shared_ptr<Executors> &executors);
public:
/**
@@ -250,7 +250,7 @@ private:
std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
private:
- const std::shared_ptr<ExecutorMap> _executors;
+ const std::shared_ptr<Executors> _executors;
IODescription _io_desc;
std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
sem_t _async_io_descs_sem;
diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h
new file mode 100644
index 000000000..5adb0eda4
--- /dev/null
+++ b/runtime/onert/core/include/exec/Executors.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "IExecutor.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors
+{
+public:
+ Executors(void) = default;
+ Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
+ Executors(const Executors &) = delete;
+ Executors(Executors &&) = default;
+
+ // TODO Use Executor index
+ void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
+ {
+ _executors.emplace(idx, std::move(exec));
+ }
+
+ std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
+
+ uint32_t inputSize() const;
+
+ uint32_t outputSize() const;
+
+ const ir::OperandInfo inputInfo(const ir::IOIndex &index);
+
+ const ir::OperandInfo outputInfo(const ir::IOIndex &index);
+
+ void execute(const IODescription &desc);
+
+private:
+ void executeEntries(const IODescription &desc);
+
+private:
+ // TODO Use Executor index
+ // Changing index will effect if/while compile and kernel implementation
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+ // NOTE _model_edges may use different struct type for executor implementation
+ std::unique_ptr<ir::ModelEdges> _model_edges;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index cf3f2a882..7ff6d8b8c 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -75,8 +75,7 @@ public:
public: // methods related to dynamic tensor
struct DynamicTensorCtx
{
- ir::OperationIndex op_ind;
- const ir::Operations *operations = nullptr;
+ const ir::Operation *op = nullptr;
std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
};
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index adc68074f..bb5b5af98 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -107,8 +107,6 @@ struct IExecutor
virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
};
-using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index 7a7688334..286caf72f 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -20,9 +20,9 @@
#include <functional>
#include <unordered_map>
+#include "ir/Model.h"
#include "ir/Operands.h"
#include "ir/Operations.h"
-#include "ir/Subgraphs.h"
namespace onert
{
@@ -50,7 +50,9 @@ private:
};
public:
- Graph(void);
+ explicit Graph(void);
+ explicit Graph(const Graph &);
+
~Graph(void);
// Graph Building
@@ -87,10 +89,9 @@ public:
void verify(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
void setLayout(Layout layout) { _layout = layout; }
- void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
- void setPartialgraphs(const std::shared_ptr<Subgraphs> &partialgraphs)
+ void setPartialModel(const std::shared_ptr<Model> &partial_model)
{
- _partialgraphs = partialgraphs;
+ _partialgraphs = partial_model;
}
void
setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
@@ -134,27 +135,25 @@ public:
Operands &operands() { return _operands; } // TODO Remove this non-const accessor
const Operations &operations() const { return _operations; }
Operations &operations() { return _operations; }
- const std::shared_ptr<Subgraphs> &subgraphs() const { return _subgraphs; }
- std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
Layout layout() const { return _layout; }
- std::shared_ptr<Subgraphs> &partialgraphs() { return _partialgraphs; }
+ std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
{
return _tensor_names;
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_input_begin()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
{
return _name_to_input.begin();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_input_end()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
{
return _name_to_input.end();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_output_begin()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
{
return _name_to_output.begin();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_output_end()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
{
return _name_to_output.end();
}
@@ -172,13 +171,11 @@ private:
OperandIndexSequence _outputs;
std::unordered_map<std::string, IOIndex> _name_to_input;
std::unordered_map<std::string, IOIndex> _name_to_output;
- // Child subgraphs
- std::shared_ptr<Subgraphs> _subgraphs;
// TFLite and circle's default layout is NHWC;
Layout _layout{Layout::NHWC};
- // Partial Graphs
- std::shared_ptr<ir::Subgraphs> _partialgraphs;
+ // model for partial graphs
+ std::shared_ptr<ir::Model> _partialgraphs;
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
};
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
index e01b090f3..f01a4c84d 100644
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -38,6 +38,9 @@ using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
struct SubgraphIndexTag;
using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+struct ModelIndexTag;
+using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+
template <typename IndexType>
std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
{
@@ -64,7 +67,12 @@ inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
{
- return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph
+ return _index_print_impl(o, "SUBGRAPH", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i)
+{
+ return _index_print_impl(o, "MODEL", i);
}
} // namespace ir
diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h
index 082810172..0cdbcc2c8 100644
--- a/runtime/onert/core/include/ir/Layout.h
+++ b/runtime/onert/core/include/ir/Layout.h
@@ -18,6 +18,7 @@
#define __ONERT_IR_LAYOUT_H__
#include <functional>
+#include <stdexcept>
#include <string>
namespace onert
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Model.h
index 6cb369447..c3c0d87b8 100644
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ b/runtime/onert/core/include/ir/Model.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_SUBGRAPHS_H__
-#define __ONERT_IR_SUBGRAPHS_H__
+#ifndef __ONERT_IR_MODEL_H__
+#define __ONERT_IR_MODEL_H__
#include <memory>
#include <unordered_map>
@@ -30,15 +30,15 @@ namespace ir
class Graph;
-class Subgraphs
+class Model
{
public:
- Subgraphs() = default;
- Subgraphs(const Subgraphs &obj) = default;
- Subgraphs(Subgraphs &&) = default;
- Subgraphs &operator=(const Subgraphs &) = default;
- Subgraphs &operator=(Subgraphs &&) = default;
- ~Subgraphs() = default;
+ Model() = default;
+ Model(const Model &obj) = default;
+ Model(Model &&) = default;
+ Model &operator=(const Model &) = default;
+ Model &operator=(Model &&) = default;
+ ~Model() = default;
/**
* @brief Put subgraph in the container with a new Index for that
@@ -120,14 +120,14 @@ public:
*
* @return count of Subgraphs
*/
- size_t count() const { return _subgraphs.size(); }
+ size_t subgraphs_count() const { return _subgraphs.size(); }
/**
* @brief Return the primary subgraph
*
- * @return std::shared_ptr<Graph> Primary sugraph
+ * @return std::shared_ptr<Graph> Primary subgraph
*/
- std::shared_ptr<Graph> primary() const { return _subgraphs.at(SubgraphIndex{0}); }
+ std::shared_ptr<Graph> primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); }
private:
std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
@@ -136,4 +136,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_SUBGRAPHS_H__
+#endif // __ONERT_IR_MODEL_H__
diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h
new file mode 100644
index 000000000..d9f825e85
--- /dev/null
+++ b/runtime/onert/core/include/ir/NNPkg.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_NNPKG_H__
+#define __ONERT_IR_NNPKG_H__
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/Model.h"
+
+namespace onert
+{
+namespace ir
+{
+
+using IODesc = std::tuple<ModelIndex, SubgraphIndex, IOIndex>;
+
+struct ModelEdge
+{
+ IODesc from;
+ IODesc to;
+};
+
+struct ModelEdgeEqual
+{
+ bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const
+ {
+ return lhs.from == rhs.from && lhs.to == rhs.to;
+ }
+};
+
+struct ModelEdgeHash
+{
+ size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept
+ {
+ unsigned long long h1 = (std::get<0>(edge.from).value() << 24) |
+ (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value();
+ unsigned long long h2 = (std::get<0>(edge.to).value() << 24) |
+ (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value();
+ return h1 + h2;
+ }
+};
+
+inline std::ostream &operator<<(std::ostream &o, const IODesc &od)
+{
+ o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value();
+ return o;
+}
+
+using ModelEdgeSet = std::unordered_set<ir::ModelEdge, ir::ModelEdgeHash, ir::ModelEdgeEqual>;
+
+/**
+ * @brief Struct to gather model I/O information in multimodel NN package
+ * Model I/O will have role one of below
+ * - Package input/output
+ * - Edge's start/finish point between model
+ */
+struct ModelEdges
+{
+ std::vector<ir::IODesc> pkg_inputs;
+ std::vector<ir::IODesc> pkg_outputs;
+ ModelEdgeSet edges;
+};
+
+class NNPkg
+{
+public:
+ NNPkg() = default;
+ NNPkg(const NNPkg &obj) = default;
+ NNPkg(NNPkg &&) = default;
+ NNPkg &operator=(const NNPkg &) = default;
+ NNPkg &operator=(NNPkg &&) = default;
+ ~NNPkg() = default;
+
+ NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
+ std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+
+ /**
+ * @brief Put model at index
+ *
+ * @param[in] model Model to be pushed
+ * @param[in] index Index where Model is to be pushed
+ */
+ void push(ModelIndex index, const std::shared_ptr<Model> &model) { _models[index] = model; }
+
+ /**
+ * @brief Get the count of model
+ *
+ * @return the count of models
+ */
+ size_t model_count() const { return _models.size(); }
+
+ /**
+ * @brief Get model at index
+ *
+ * @param[in] index Index of the model to be returned
+ * @return Model at index
+ */
+ const std::shared_ptr<Model> &model(const ModelIndex &index) const { return _models.at(index); }
+ /**
+ * @brief Get model at index
+ *
+ * @param[in] index Index of the model to be returned
+ * @return Model at index
+ */
+ std::shared_ptr<Model> &model(const ModelIndex &index) { return _models.at(index); }
+
+ /**
+ * @brief Get pkg_input at index
+ *
+ * @param[in] index Index of pkg_input to be returned
+ * @return IODesc at index
+ */
+ const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; }
+ /**
+ * @brief Get pkg_input at index
+ *
+ * @param[in] index Index of pkg_input to be returned
+ * @return IODesc at index
+ */
+ IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; }
+ /**
+ * @brief Add input at the end
+ *
+ * @param[in] input Input IODesc to be pushed
+ */
+ void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); }
+
+ /**
+ * @brief Get pkg_output at index
+ *
+ * @param[in] index Index of pkg_output to be returned
+ * @return IODesc at index
+ */
+ const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; }
+ /**
+ * @brief Get pkg_output at index
+ *
+ * @param[in] index Index of pkg_output to be returned
+ * @return IODesc at index
+ */
+ IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; }
+ /**
+ * @brief Add output at the end
+ *
+ * @param[in] output Output IODesc to be pushed
+ */
+ void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); }
+
+ /**
+ * @brief Add edge between models at the end
+ *
+ * @param[in] from from IODesc
+ * @param[in] to to IODesc
+ */
+ void addEdge(const IODesc &from, const IODesc &to)
+ {
+ std::cout << from << " -> " << to << std::endl;
+ _edges.edges.insert(ModelEdge{from, to});
+ }
+ /**
+ * @brief Get model edge set
+ * @return Edge set reference
+ */
+ const ModelEdges &model_edges() { return _edges; }
+
+ // TODO: Add iterate() or getter for edges
+
+private:
+ std::unordered_map<ModelIndex, std::shared_ptr<Model>> _models;
+ ModelEdges _edges;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_NNPKG_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
index 0a00da5fd..3c5062795 100644
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -50,11 +50,7 @@ public:
public:
DataType type() const { return _type; }
- float scale() const
- {
- assert(_quant.scales.size() == 1);
- return _quant.scales[0];
- }
+ float scale() const { return _quant.scales[0]; }
const std::vector<float> &scales() const { return _quant.scales; }
int32_t zero_point() const
{
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h
index 1825f7fad..3c20f392f 100644
--- a/runtime/onert/core/include/ir/operation/Bulk.h
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -32,6 +32,8 @@ public:
struct Param
{
std::string binary_path;
+ std::vector<ir::Shape> origin_input_shapes;
+ std::vector<ir::Shape> origin_output_shapes;
};
public:
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h
index db76f9dde..4369ca53e 100644
--- a/runtime/onert/core/include/util/CalculateActivationRange.h
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -17,6 +17,8 @@
#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+#include <limits>
+
#include "ir/InternalType.h"
namespace onert
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 89a9a6ac2..4bbc02ac3 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index da8bc8620..d53b8106d 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -17,17 +17,17 @@
#ifndef __ONERT_UTIL_CONFIG_SOURCE_H__
#define __ONERT_UTIL_CONFIG_SOURCE_H__
-#include <memory>
-
-#include "IConfigSource.h"
+#include <string>
+#include <unordered_map>
namespace onert
{
namespace util
{
-void config_source(std::unique_ptr<IConfigSource> &&source);
-void config_source_ext(std::unique_ptr<IConfigSource> &&source);
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+void setConfigKeyValues(const CfgKeyValues &keyValues);
bool toBool(const std::string &val);
int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
index a493789fa..36b6c85c8 100644
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -17,14 +17,13 @@
#ifndef __ONERT_UTIL_OBJECT_MANAGER_H__
#define __ONERT_UTIL_OBJECT_MANAGER_H__
-#include <unordered_map>
-#include <memory>
-#include <list>
-#include <functional>
+#include "util/logging.h"
+#include <cassert>
+#include <functional>
+#include <list>
#include <memory>
-
-#include "util/logging.h"
+#include <unordered_map>
namespace onert
{
@@ -208,7 +207,7 @@ public:
l.push_back(e.first);
}
- for (auto index : l)
+ for (auto &index : l)
{
fn(index, *_objects[index]);
}
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
index 334257d87..da284d2fb 100644
--- a/runtime/onert/core/include/util/TracingCtx.h
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -19,7 +19,7 @@
#include "ir/Graph.h"
#include "ir/Index.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
#include <unordered_map>
#include <mutex>
@@ -37,29 +37,9 @@ class TracingCtx
public:
/**
* @brief Create and store unique session id managed by this class
- * Note that this constructor can be called by multiple sessions running in parallely.
- * Use this constructor only when there is only one subgraph in a model.
+ * @note This constructor can be called by multiple session running in parallely.
*/
- TracingCtx(const ir::Graph *primary_subgraph)
- {
- decideSessionID();
- _subgraph_indices.emplace(primary_subgraph, 0);
- }
-
- /**
- * @brief Create and store unique session id managed by this class
- * Note that this constructor can be called by multiple sessions running in parallely.
- */
- TracingCtx(const onert::ir::Subgraphs *subgraphs)
- {
- assert(subgraphs);
-
- decideSessionID();
-
- auto count = subgraphs->count();
- for (size_t i = 0; i < count; i++)
- _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
- }
+ TracingCtx(void) { decideSessionID(); }
uint32_t getSessionId() const { return _session_id; }
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h
index e67be988d..390dbb579 100644
--- a/runtime/onert/core/src/backend/builtin/ExternalContext.h
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -24,6 +24,8 @@
#include <ruy/ctx.h>
#include <ruy/tune.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
index 3d6358d9d..fa2fc0b94 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -16,12 +16,10 @@
#include "KernelGenerator.h"
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
+#include "kernel/WhileLayer.h"
+
#include "exec/FunctionSequence.h"
namespace onert
@@ -35,12 +33,12 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
- _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{
- external_context}
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
+ external_context}
{
UNUSED_RELEASE(_graph);
UNUSED_RELEASE(_tensor_registries);
- UNUSED_RELEASE(_executor_map);
+ UNUSED_RELEASE(_executors);
}
std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
@@ -48,20 +46,16 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_dyn_tensor_manager);
assert(_tensor_reg);
- auto dyn_shape_inferer =
- std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
auto ret = std::make_unique<exec::FunctionSequence>();
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_graph.operations();
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_graph.operations().at(ind);
+ dyn_ctx->dynamic_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
@@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::If &node)
output_tensors.emplace_back(output_tensor);
}
- // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
+ // IfLayer just set Executors instead of then and else executor to avoid complexity of
// creating executor recusively
const auto cond_tensor = input_tensors.front();
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
_external_context);
_return_fn = std::move(fn);
@@ -136,10 +130,10 @@ void KernelGenerator::visit(const ir::operation::While &node)
output_tensors.emplace_back(output_tensor);
}
- // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
+ // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
- input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
_dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
index 00ad962b9..d5931ca26 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -17,13 +17,14 @@
#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
-#include "exec/IExecutor.h"
+#include "DynamicTensorManager.h"
#include "ExternalContext.h"
-#include "ir/Graph.h"
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/basic/KernelGeneratorBase.h"
#include "TensorRegistry.h"
+#include "../../compiler/TensorRegistries.h"
+
+#include "backend/basic/KernelGeneratorBase.h"
+#include "exec/Executors.h"
+#include "ir/Graph.h"
namespace onert
{
@@ -43,10 +44,10 @@ public:
{
_tensor_registries = tensor_registries;
}
- void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ void setExecutors(const std::shared_ptr<exec::Executors> &executors)
{
// FIXME Using shared_ptr's raw pointer!
- _executor_map = executor_map.get();
+ _executors = executors.get();
}
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
@@ -64,7 +65,7 @@ private:
DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
index fdd9d9d14..cdb41960a 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -16,10 +16,6 @@
#include "IfLayer.h"
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include "PermuteLayer.h"
-
namespace onert
{
namespace backend
@@ -33,13 +29,13 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
+ exec::Executors *executors,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
- _executor_map{executor_map}, _external_context{external_context}
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
+ _external_context{external_context}
{
- // At this point, executor_map may not have executors of then subg and else subg
+ // At this point, executors may not have executors of then subg and else subg
}
void IfLayer::run()
@@ -65,12 +61,12 @@ void IfLayer::run()
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = _executor_map->at(_then_subg_index).get();
+ subg_exec = _executors->at(_then_subg_index).get();
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = _executor_map->at(_else_subg_index).get();
+ subg_exec = _executors->at(_else_subg_index).get();
}
subg_exec->execute(_input_tensors, _output_tensors);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
index f12ef3605..fa5537a67 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
#include "../ExternalContext.h"
namespace onert
@@ -37,8 +37,7 @@ public:
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
- const std::shared_ptr<ExternalContext> &external_context);
+ exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
@@ -49,7 +48,7 @@ private:
const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
index 20cd87ad1..ddaecdf57 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -16,9 +16,9 @@
#include "PermuteLayer.h"
-#include "exec/ShapeConverter.h"
+#include "../../../exec/ShapeConverter.h"
-#include "ruy/context.h" // from @ruy
+#include <ruy/context.h> // from @ruy
namespace onert
{
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
index ac5470e85..227e32434 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -17,10 +17,10 @@
#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
#include "../ExternalContext.h"
-#include "ruy/thread_pool.h" // from @ruy
+#include "../../../exec/IPermuteFunction.h"
+
+#include <ruy/thread_pool.h> // from @ruy
namespace onert
{
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
index 81b4a6378..8e006c5ea 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -16,11 +16,12 @@
#include "WhileLayer.h"
-#include <algorithm>
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
#include "PermuteLayer.h"
+#include "../../../exec/ExecutorBase.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <algorithm>
namespace onert
{
@@ -34,14 +35,14 @@ namespace kernel
WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+ const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
_dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
{
- // At this point, executor_map may not have executors of cond subg and body subg
+ // At this point, executors may not have executors of cond subg and body subg
}
void WhileLayer::run()
@@ -56,8 +57,8 @@ void WhileLayer::run()
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = _executor_map->at(_cond_subg_index).get();
- auto body_exec = _executor_map->at(_body_subg_index).get();
+ auto cond_exec = _executors->at(_cond_subg_index).get();
+ auto body_exec = _executors->at(_body_subg_index).get();
// Need a temp tensor to hold the cond subgraph output
assert(cond_exec->getOutputTensors().size() == 1);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
index 912102781..8551b3d09 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
@@ -41,7 +41,7 @@ public:
WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager,
+ exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
@@ -52,7 +52,7 @@ private:
const ir::SubgraphIndex _body_subg_index;
const std::vector<backend::IPortableTensor *> _input_tensors;
const std::vector<backend::IPortableTensor *> _output_tensors;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index 0d6051b21..44442c065 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -16,16 +16,11 @@
#include "compiler/BackendManager.h"
-#include <memory>
-#include <dlfcn.h>
+#include "../backend/builtin/Backend.h"
+#include "../backend/builtin/Config.h"
-#include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
+#include <dlfcn.h>
+#include <memory>
static const char *SHARED_LIB_EXT =
#if defined(__APPLE__) && defined(__MACH__)
@@ -152,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
return nullptr;
}
-const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; }
+const backend::Backend *BackendManager::getBuiltin() const { return _builtin; }
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 6a1d8fcec..7be9c1e3b 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -18,29 +18,27 @@
#include "ExecutorFactory.h"
#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../backend/builtin/Config.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../interp/InterpExecutor.h"
+#include "../ir/OperationCloner.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
-#include <backend/builtin/Config.h>
-#include "compiler/BackendManager.h"
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
#include "compiler/StaticShapeInferer.h"
-#include "compiler/OperationLowerInfo.h"
-#include "compiler/pass/ConstantOutputPass.h"
-#include "compiler/pass/OddOutputPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
-#include "exec/ExecTime.h"
-#include "ir/verifier/Verifier.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "interp/InterpExecutor.h"
#include "util/ConfigSource.h"
#include "util/logging.h"
-#include "ir/OperationDumper.h"
-#include "ir/OperationCloner.h"
-#include "misc/string_helpers.h"
-#include "json/json.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <misc/string_helpers.h>
+#include <json/json.h>
+
+// TODO Remove using fstream header
+#include <fstream>
namespace
{
@@ -86,8 +84,104 @@ void verboseOptions(compiler::CompilerOptions &options)
<< std::noboolalpha;
}
-void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs,
- const std::string &str)
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
+createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
+{
+ // Allocate StaticShapeInferer per each subgraph
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
+ }
+
+ // Append observers in all StaticShapeInferers
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+
+ // TODO: Change this iteration for all to controlflow iteration
+ lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
+ const ir::Operation &op) {
+ // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+ // call StaticShapeInferes of child subgraphs recursively
+ auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+ auto *child_inferer = inferers.at(child_subg_idx).get();
+ inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+ };
+
+ // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
+ // to update inputs of child subgraphs
+ auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> child_subg_inputs;
+ auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+ for (const auto &input_idx : child_subg.getInputs())
+ {
+ auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+ child_subg_inputs.emplace_back(operand_ptr);
+ }
+ inferers.at(subg_index)
+ ->appendSubgInputObserver(child_subg_idx,
+ std::make_unique<compiler::OperandObserver>(child_subg_inputs));
+ };
+
+ // A Function to set controlflow output observers. This makes it possible for a
+ // StaticShapeInferer to update outputs of parent controlflow opeerations
+ auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> cf_outputs;
+ auto &subg = lowered_subg->graph();
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto operand_ptr = subg.operands().getRawPtr(output_idx);
+ cf_outputs.emplace_back(operand_ptr);
+ }
+ inferers.at(child_subg_idx)
+ ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
+ };
+
+ // Append Observers in a StaticShapeInferer
+ if (op.opcode() == ir::OpCode::If)
+ {
+ const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+ appendChildInferer(if_op.param().then_subg_index);
+ appendChildInferer(if_op.param().else_subg_index);
+
+ appendSubgraphInputObserver(if_op.param().then_subg_index);
+ appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+ setControlFlowOutputObserver(if_op.param().then_subg_index);
+ }
+ else if (op.opcode() == ir::OpCode::While)
+ {
+ const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+ appendChildInferer(while_op.param().cond_subg_index);
+ appendChildInferer(while_op.param().body_subg_index);
+
+ appendSubgraphInputObserver(while_op.param().cond_subg_index);
+ appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+ setControlFlowOutputObserver(while_op.param().body_subg_index);
+ }
+ });
+ }
+
+ return inferers;
+}
+
+} // namespace
+
+namespace onert
+{
+
+namespace compiler
+{
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
{
// TODO Support multiple subgraphs for manual scheduling
auto key_val_list = nnfw::misc::split(str, ';');
@@ -102,37 +196,24 @@ void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgr
const auto &key_str = key_val.at(0);
const auto &val = key_val.at(1);
auto key = static_cast<uint32_t>(std::stoi(key_str));
-
- subgs.at(ir::SubgraphIndex{0})
- ->operations()
- .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
- ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+ this->index_to_backend.emplace(ir::OperationIndex{key}, val);
}
}
-} // namespace
-
-namespace onert
-{
-
-namespace compiler
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
{
-
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
-{
- CompilerOptions options;
- options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- options.executor = util::getConfigString(util::config::EXECUTOR);
- options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
- options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
- options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-
+ auto o = std::make_unique<CompilerOptions>();
+ o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+ o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ o->executor = util::getConfigString(util::config::EXECUTOR);
+ o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
+ o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
{
// Backend for all
- auto &ms_options = options.manual_scheduler_options;
+ auto &ms_options = o->manual_scheduler_options;
// Default value for op_backend_all is first element in the backend list
ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
@@ -151,54 +232,67 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
// Index to Backend
auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- setBackendMap(ms_options, subgs, map_str);
+ ms_options.setBackendMap(map_str);
}
- return options;
+ return o;
}
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
- : _subgraphs{subgs}, _state{State::CREATED}
+Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
+ : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
{
- // Set default values for CompilerOptions
- // All these default values should not be fetched from Env, when we stop supporting Android NN
- // API.
- _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
-
- _options.tracing_ctx = tracing_ctx;
+ // DO NOTHING
}
-void Compiler::enableToFp16() { _options.fp16_enable = true; }
+Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+{
+ for (uint32_t i = 0; i < copts.size(); i++)
+ {
+ _voptions.push_back(copts[i].get());
+ }
+}
-void Compiler::set_backend_from_str(const char *backend_settings)
+void Compiler::enableToFp16()
{
- assert(_subgraphs != nullptr);
- // Backend for all
- auto &ms_options = _options.manual_scheduler_options;
- setBackendMap(ms_options, *_subgraphs, std::string{backend_settings});
+ for (auto options : _voptions)
+ options->fp16_enable = true;
}
void Compiler::checkProfilerConditions()
{
- if (!_options.he_scheduler)
+ if (_nnpkg->model_count() != 1)
+ throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+ auto &options = *_voptions[0];
+
+ if (options.he_scheduler)
throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
- if (_options.executor != "Dataflow")
+ if (options.executor != "Dataflow")
throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
}
bool Compiler::buildPartialGraph(uint32_t num_graphs)
{
- if (_subgraphs->count() > 1)
+ // Use 1st model and options only on partial graph (pipeline) compile
+ assert(_nnpkg->model_count() == 1);
+ assert(_voptions.size() == 1);
+
+ auto model = _nnpkg->primary_model();
+ auto &options = *_voptions[0];
+
+ if (model->subgraphs_count() > 1)
return false;
- auto partialgraphs = std::make_shared<ir::Subgraphs>();
+ auto partialgraphs = std::make_shared<ir::Model>();
for (uint32_t idx = 0; idx < num_graphs; idx++)
{
auto partialgraph = std::make_unique<ir::Graph>();
partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
}
- _subgraphs->primary()->setPartialgraphs(partialgraphs);
+ model->primary_subgraph()->setPartialModel(partialgraphs);
auto partial_graph = primary_subgraph()->partialgraphs();
@@ -208,8 +302,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
for (auto use_operation : use_operations)
{
- auto graph_index = _options.partial_graph_options.index_to_graph.find(use_operation);
- if (graph_index == _options.partial_graph_options.index_to_graph.end())
+ auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
+ if (graph_index == options.partial_graph_options.index_to_graph.end())
{
throw std::runtime_error("Invalid Partition Map");
}
@@ -230,8 +324,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
primary_subgraph()->operations().iterate(
[&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
- auto graph_index = _options.partial_graph_options.index_to_graph.find(operation_index);
- if (graph_index == _options.partial_graph_options.index_to_graph.end())
+ auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
+ if (graph_index == options.partial_graph_options.index_to_graph.end())
{
throw std::runtime_error("Invalid Partition Map");
}
@@ -259,7 +353,7 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
assert(new_operation_index == operation_index);
});
- for (uint32_t idx = 0; idx < partial_graph->count(); idx++)
+ for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
{
auto partition = partial_graph->at(ir::SubgraphIndex{idx});
@@ -282,10 +376,10 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
auto iter = use_operations.begin();
ir::SubgraphIndex graph_index =
- _options.partial_graph_options.index_to_graph.find(*iter++)->second;
+ options.partial_graph_options.index_to_graph.find(*iter++)->second;
while (iter != use_operations.end())
{
- if (graph_index != _options.partial_graph_options.index_to_graph.find(*iter)->second &&
+ if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
!partition->getOutputs().contains(operand_index))
{
partition->addOutput(operand_index,
@@ -344,96 +438,157 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
return true;
}
-std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
+std::shared_ptr<CompilerArtifact> Compiler::compile(void)
{
- // Set control flow backend for control flow operators
+ for (auto options : _voptions)
{
+ // Set control flow backend for control flow operators
auto &builtin_id = backend::builtin::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- }
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- // FIXME This is a workaround for bcq operations, should remove it
- {
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ // FIXME This is a workaround for bcq operations, should remove it
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+ // FIXME This is a workaround for bulk operations, should remove it
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+
+ verboseOptions(*options);
}
- verboseOptions(_options);
+ // NYI: allow one model compilation
+ auto const model_count = _nnpkg->model_count();
+ if (model_count != _voptions.size())
+ throw std::runtime_error{"Model count and option vector size mismatch"};
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(subg))
- .append(std::make_unique<pass::OddOutputPass>(subg))
- .run();
+ for (uint32_t i = 0; i < model_count; i++)
+ {
+ _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
- // Optimizations
- pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
- });
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
+ }
/***************************************************
* Prepare compilation phase
***************************************************/
- auto executors = std::make_shared<exec::ExecutorMap>();
-
// Compilable check
// TODO: Support hybrid execution -
// execution between interpreter and compiled executor (including control flow)
- if (_options.disable_compile)
+ if (_voptions[0]->disable_compile)
{
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ if (model_count > 1)
+ throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+
+ auto executors = std::make_shared<exec::Executors>();
+
+ _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
});
_state = State::COMPILED;
- return executors;
+ return std::make_shared<CompilerArtifact>(executors, nullptr);
}
// Mode check
- if (_options.he_profiling_mode)
+ // TODO handle option for each model
+ if (_voptions[0]->he_profiling_mode)
checkProfilerConditions();
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ auto tracing_ctx = std::make_unique<util::TracingCtx>();
+
+ // Model edge context
+ std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
- dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
- // Lower: Assign backend
- lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
+ if (model_count == 1)
+ {
+ _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+ // Lower: Assign backend
+ lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+ // Set tracing_ctx for copied graph
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+ });
+ }
+ else
+ {
+ // TODO Support tracing_ctx for multiple model
+ tracing_ctx = nullptr;
+
+ // Copy model edge context
+ model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
- subg.setSubgraphs(nullptr);
- });
+ for (uint32_t i = 0; i < model_count; i++)
+ {
+ auto model = _nnpkg->model(ir::ModelIndex{i});
+ if (model->subgraphs_count() != 1)
+ throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
+ auto subg = model->primary_subgraph();
+ dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
+
+ // For multimodel, model index is used for lowered graph index in lowered graph map
+ // and index type is SubgraphIndex
+ // TODO Find better way to represent lowered graph index for multimodel's subgraph
+ lowered_subgs[ir::SubgraphIndex{i}] =
+ std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
+ }
+ }
- _subgraphs.reset();
+ _nnpkg.reset();
for (auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
auto &lowered_subg = pair.second;
- onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
- dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+ dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
}
// Shape inference.
{
- const auto primary_subg_idx = ir::SubgraphIndex{0};
- StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
- auto &lowered_subg = lowered_subgs.at(primary_subg_idx);
- auto ordered_ops = lowered_subg->graph().topolSortOperations();
- for (auto op_ind : ordered_ops)
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_subgs);
+
+ if (model_count == 1)
{
- const auto &op = lowered_subg->graph().operations().at(op_ind);
- bool has_dynamic_tensor = inferer.infer(op);
- lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair : inferers)
+ {
+ const auto inferer = pair.second.get();
+ inferer->dump();
+ }
+ }
+ else
+ {
+ // Assume multi model has only one subgraph on each model
+ for (const auto &pair : inferers)
+ {
+ const auto inferer = pair.second.get();
+ inferer->infer();
+ inferer->dump();
+ }
}
- inferer.dump();
}
// Shape validation
@@ -452,8 +607,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
/*************************************************************
* Backend independent analysis & optimization phase finished
*************************************************************/
-
- executors = std::make_shared<exec::ExecutorMap>();
+ auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
for (auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
@@ -464,24 +618,31 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+
+ auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
+ auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+ std::move(lowered_subg), tracing_ctx.get(), options, executors)};
executor->setIndexedRanks(indexed_ranks);
- executors->insert(std::make_pair(subg_index, std::move(executor)));
+ executors->emplace(subg_index, std::move(executor));
}
/********************************
* Code generation phase finished
********************************/
_state = State::COMPILED;
- return executors;
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
}
-std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *package_file_path,
- const char *map_file_path)
+std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
+ const char *map_file_path)
{
- std::vector<std::shared_ptr<exec::ExecutorMap>> executors;
- auto executor_map = std::make_shared<exec::ExecutorMap>();
+ // Allow one model compilation for pipeline
+ if (_nnpkg->model_count() != 1)
+ throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
+ assert(_voptions.size() == 1);
+
+ auto model = _nnpkg->primary_model();
+ auto &options = *_voptions[0];
std::string package_path(package_file_path);
std::string partition_map_file;
@@ -508,7 +669,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
num_graphs = np.asUInt();
for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
{
- _options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
+ options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
ir::SubgraphIndex{map[i].asUInt()};
}
}
@@ -525,25 +686,25 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
// Set control flow backend for control flow operators
{
auto &builtin_id = backend::builtin::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
}
// FIXME This is a workaround for bcq operations, should remove it
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
}
- // It doesn't support tracing in case of partial graph
+ // FIXME This is a workaround for bulk operations, should remove it
{
- _options.tracing_ctx = nullptr;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
}
- verboseOptions(_options);
+ verboseOptions(options);
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
// Mandatory passes
auto part = subg.partialgraphs();
part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
@@ -566,38 +727,41 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
// Compilable check
// TODO: Support hybrid execution -
// execution between interpreter and compiled executor (including control flow)
- if (_options.disable_compile)
+ if (options.disable_compile)
{
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executor_map->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- executors.push_back(executor_map);
+ std::vector<std::shared_ptr<CompilerArtifact>> results;
+ auto executors = std::make_shared<exec::Executors>();
+
+ model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
});
+ results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
_state = State::COMPILED;
- return executors;
+ return results;
}
// Mode check
- if (_options.he_profiling_mode)
+ if (options.he_profiling_mode)
checkProfilerConditions();
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
lowered_partialgraphs;
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
auto part = subg.partialgraphs();
part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
- onert::dumper::dot::DotDumper dot_dumper_part(partialgraph, dump_level);
- dot_dumper_part.dump(nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
+ dot_dumper_part.dump(partialgraph,
+ nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
// // Lower: Assign backend
lowered_partialgraphs[pindex] =
- std::make_unique<compiler::LoweredGraph>(subg, partialgraph, _options);
- partialgraph.setSubgraphs(nullptr);
+ std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
});
});
@@ -606,25 +770,20 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
const auto &partialgraph_index = pair.first;
auto &lowered_partialgraph = pair.second;
- onert::dumper::dot::DotDumper dot_dumper_lowered_part(lowered_partialgraph.get(), dump_level);
- dot_dumper_lowered_part.dump("after_lower_subg_partialgraph-" +
- std::to_string(partialgraph_index.value()));
+ dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
+ std::to_string(partialgraph_index.value()));
}
// Partial Graph shape inference
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_partialgraphs);
+ // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
for (auto &pair : lowered_partialgraphs)
{
const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
- StaticShapeInferer partial_inferer(partialgraph_index, lowered_partialgraphs);
- auto ordered_ops = lowered_partialgraph->graph().topolSortOperations();
- for (auto op_ind : ordered_ops)
- {
- const auto &op = lowered_partialgraph->graph().operations().at(op_ind);
- bool has_dynamic_tensor = partial_inferer.infer(op);
- lowered_partialgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
- }
- partial_inferer.dump();
+ const auto partial_inferer = inferers.at(partialgraph_index).get();
+ partial_inferer->infer();
+ partial_inferer->dump();
}
// Shape validation
@@ -652,9 +811,11 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
}
+ std::vector<std::shared_ptr<CompilerArtifact>> results;
for (auto &pair : ordered)
{
- executor_map = std::make_shared<exec::ExecutorMap>();
+ auto executors = std::make_shared<exec::Executors>();
+
const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
auto &lowered_partialgraph = pair.second;
auto indexed_ranks = lowered_partialgraph->indexed_ranks();
@@ -663,19 +824,21 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
lowered_partialgraph->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_partialgraph), _options, executor_map)};
+ ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
executor->setIndexedRanks(indexed_ranks);
- executor_map->insert(std::make_pair(ir::SubgraphIndex{0}, std::move(executor)));
- executors.push_back(executor_map);
+ executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
+
+ // It doesn't support tracing in case of partial graph
+ results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
}
- _subgraphs.reset();
+ _nnpkg.reset();
/********************************
* Code generation phase finished
********************************/
_state = State::COMPILED;
- return executors;
+ return results;
}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index f9db1ca89..024556e7e 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,23 +16,22 @@
#include "ExecutorFactory.h"
-#include "backend/builtin/Config.h"
-#include "backend/builtin/KernelGenerator.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/UserTensor.h"
-#include "backend/IPortableTensor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "compiler/Linear.h"
-#include "dumper/text/GraphDumper.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "ir/OperationCloner.h"
-#include "util/TracingCtx.h"
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
#include <functional>
#include <memory>
@@ -242,16 +241,17 @@ ExecutorFactory::ExecutorFactory()
{
_map["Linear"] = createLinearExecutor;
_map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, false);
+ std::placeholders::_3, std::placeholders::_4, false);
_map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, true);
+ std::placeholders::_3, std::placeholders::_4, true);
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ const std::shared_ptr<exec::Executors> &executors)
{
- return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+ return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
}
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
@@ -282,7 +282,7 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
}
void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const std::shared_ptr<exec::Executors> &executors,
const backend::BackendContexts &backend_contexts)
{
for (auto &pair : backend_contexts)
@@ -292,7 +292,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
{
auto builtin_kernel_gen = builtin_context->kernel_gen;
builtin_kernel_gen->setTensorRegistries(tensor_regs);
- builtin_kernel_gen->setExecutorMap(executor_map);
+ builtin_kernel_gen->setExecutors(executors);
}
}
}
@@ -317,12 +317,11 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
return ordered_contexts;
}
-exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+exec::IExecutor *ExecutorFactory::createLinearExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
{
- auto graph = lowered_graph->graph();
+ auto &graph = lowered_graph->graph();
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -346,7 +345,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
ExecutionBuilder builder;
@@ -426,14 +425,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
auto code_map = builder.releaseCodeMap();
- auto exec = new exec::LinearExecutor{
- std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order,
- options.tracing_ctx};
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+ std::move(backend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
@@ -441,8 +443,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
+ bool parallel)
{
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -462,7 +465,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
ExecutionBuilder builder;
@@ -491,13 +494,13 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (parallel)
{
exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
- tensor_regs, std::move(code_map), options.tracing_ctx};
+ tensor_regs, std::move(code_map), tracing_ctx};
}
else
{
auto dataflow_exec =
new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
- std::move(code_map), options.tracing_ctx};
+ std::move(code_map), tracing_ctx};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
@@ -515,8 +518,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 2ee05fae3..70c089f8c 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,7 +21,7 @@
#include "backend/ITensor.h"
#include "compiler/LoweredGraph.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
#include <deque>
#include <unordered_map>
@@ -38,8 +38,9 @@ public:
public:
exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ const std::shared_ptr<exec::Executors> &executors);
private:
ExecutorFactory();
@@ -48,25 +49,26 @@ private:
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts);
static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const std::shared_ptr<exec::Executors> &executors,
const backend::BackendContexts &backend_contexts);
static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
orderBackendContext(const backend::BackendContexts &backend_contexts);
- static exec::IExecutor *
- createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ static exec::IExecutor *createLinearExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
static exec::IExecutor *
createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
+ const std::shared_ptr<exec::Executors> &executors, bool parallel);
private:
- std::unordered_map<std::string, std::function<exec::IExecutor *(
- std::unique_ptr<compiler::LoweredGraph>,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ std::unordered_map<
+ std::string,
+ std::function<exec::IExecutor *(
+ std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
_map;
};
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c1cef1ab..98dc906e4 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -180,7 +180,7 @@ void Fp32ToFp16Converter::appendOpSequences()
{
_lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
// For now, the only acl_cl supports fully fp16 type
@@ -375,7 +375,7 @@ void Fp32ToFp16Converter::convertOperands()
{
_lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
// For now, the only acl_cl supports fully fp16
if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
@@ -515,7 +515,7 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o
void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &new_op_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
@@ -527,7 +527,7 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s
void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OpSequenceIndex &new_op_seq_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info =
@@ -635,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex
const ir::OperationIndex &node_index)
{
auto &node = _lowered_graph.graph().operations().at(node_index);
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto layout = lower_info->layout();
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 2f996c8e8..c4bfddb8f 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -14,17 +14,14 @@
* limitations under the License.
*/
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
+#include "HEScheduler.h"
+
#include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
#include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
+
#include <cassert>
#include <cmath>
-#include <chrono>
namespace
{
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index 1a95b9881..18ea388fd 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -23,14 +23,16 @@
#ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
#define __ONERT_COMPILER_H_E_SCHEDULER_H_
-#include "compiler/IScheduler.h"
-#include "compiler/BackendManager.h"
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/ExecTime.h"
-#include "backend/Backend.h"
-#include <memory>
-#include "ir/OperationIndexMap.h"
+#include "IScheduler.h"
+#include "../backend/builtin/Config.h"
+#include "../exec/ExecTime.h"
+
+#include <backend/Backend.h>
+#include <compiler/BackendManager.h>
+#include <compiler/Compiler.h>
+#include <ir/Graph.h>
+#include <ir/OperationIndexMap.h>
+
#include <map>
#include <memory>
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
index 514c01485..c4a2df025 100644
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -14,14 +14,13 @@
* limitations under the License.
*/
-#include <compiler/HEScheduler.h>
-#include <exec/ExecTime.h>
+#include "HEScheduler.h"
+#include "../exec/ExecTime.h"
-#include <ir/Shape.h>
+#include <ir/DataType.h>
#include <ir/InternalType.h>
+#include <ir/Shape.h>
#include <ir/TypeInfo.h>
-#include <ir/DataType.h>
-
#include <ir/operation/BinaryArithmetic.h>
#include <ir/operation/FullyConnected.h>
@@ -362,8 +361,8 @@ class HESchedulerTestWithExecutorParam : public HESchedulerTest,
// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
- testing::Values(LINEAR, DATAFLOW, PARALLEL));
+INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+ testing::Values(LINEAR, DATAFLOW, PARALLEL));
// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
@@ -371,9 +370,9 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
setExecutor(GetParam());
// Prepare graph
- ir::Subgraphs subgs;
+ ir::Model model;
auto graph(createStraightGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
+ model.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
// Set default execution and transfer time
@@ -392,8 +391,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
et.storeOperationsExecTime();
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
@@ -407,8 +406,8 @@ TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
@@ -423,9 +422,9 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
setExecutor(GetParam());
// Prepare graph
- ir::Subgraphs subgs;
+ ir::Model model;
auto graph(createBranchedGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
+ model.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
sub_op_idx(5);
@@ -449,8 +448,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
et.storeOperationsExecTime();
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
@@ -483,8 +482,8 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
et.storeOperationsExecTime();
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
@@ -505,9 +504,9 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode)
setExecutor(DATAFLOW);
// Prepare graph
- ir::Subgraphs subgs;
+ ir::Model model;
auto graph(createBranchedGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
+ model.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
sub_op_idx(5);
@@ -528,8 +527,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode)
et.storeOperationsExecTime();
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
@@ -550,8 +549,8 @@ TEST_F(HESchedulerTest, branched_graph_profiling_mode)
et.storeOperationsExecTime();
// Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
const auto br = scheduler.schedule(*graph);
ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
br->getBackend(mul1_op_idx)->config()->id());
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 73ba96238..f85b8d1bd 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -14,15 +14,13 @@
* limitations under the License.
*/
-#include <algorithm>
-#include <sstream>
-
#include "Linear.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
+#include "../dumper/text/GraphDumper.h"
+
#include "util/logging.h"
-#include "dumper/text/GraphDumper.h"
+
+#include <sstream>
namespace onert
{
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 999bffa7c..9e84753a7 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -16,24 +16,23 @@
#include "compiler/LoweredGraph.h"
-#include <assert.h>
-#include <algorithm>
-#include <sstream>
-#include "util/logging.h"
-#include "compiler/pass/ConstantInsertionPass.h"
-#include "compiler/pass/ConstantLoweringPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/PermutationOperationPass.h"
-#include "compiler/pass/PermutationInsertionPass.h"
-#include "compiler/pass/PermutationEliminationPass.h"
-#include "dumper/text/GraphDumper.h"
-#include "ir/verifier/Verifier.h"
+#include "HEScheduler.h"
+#include "ManualScheduler.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/ConstantLoweringPass.h"
+#include "pass/PassRunner.h"
+#include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../ir/verifier/Verifier.h"
+
#include "backend/Backend.h"
-#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "util/TracingCtx.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
namespace onert
{
@@ -42,7 +41,7 @@ namespace compiler
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
- lowerGraph(graph, options);
+ lowerGraph(options);
}
// TODO Design better class and constructor to represent parent_graph
@@ -50,18 +49,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph
const CompilerOptions &options)
: _graph{graph}, _parent_graph{parent_graph}
{
- lowerGraph(graph, options);
+ lowerGraph(options);
}
-void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options)
+void LoweredGraph::lowerGraph(const CompilerOptions &options)
{
- // set tracing_ctx for copied graph
- if (options.tracing_ctx)
- {
- auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
- options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
- }
-
// Build backend contexts
auto &backend_manager = BackendManager::get();
// Create contexts for other backends
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
index 1c7000986..8c6421744 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -34,77 +34,72 @@ namespace onert
namespace compiler
{
-ShapeValidator::ShapeValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
-{
-}
+ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {}
void ShapeValidator::checkUnaryOp(const ir::Operation &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
// Check if I/O shapes match
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::operator()()
{
- // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
- // creating Compiler
- assert(_graph.subgraphs() == nullptr);
-
- _current_layout = _graph.layout();
-
_graph.operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
{
+ const auto &operands = _graph.operands();
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
const auto out_index{node.getOutputs().at(0)};
- if (_ctx.at(out_index).info().isDynamic())
+ if (operands.at(out_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2);
}
void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
// All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
if (node.getInputs().size() != 2)
{
const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
- OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
- OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(crops_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(crops_index).shape().dim(0) ==
+ (operands.at(ifm_index).shape().rank() - 2));
+ OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2);
}
OP_REQUIRES(input_shape.C == output_shape.C);
@@ -112,8 +107,9 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
@@ -125,16 +121,16 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
// const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1));
+ OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1));
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0);
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2);
// more shape validation will be done inside kernel.
@@ -143,8 +139,9 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
void ShapeValidator::visit(const ir::operation::BCQGather &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
@@ -153,13 +150,14 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node)
const auto input_clusters_index{
node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
- OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
- OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(indices_index).shape().rank() <=
+ 2); // TODO : support rank up to 4 or more
+ OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2);
// more shape validation will be done inside kernel.
}
@@ -171,62 +169,67 @@ void ShapeValidator::visit(const ir::operation::Comparison &)
void ShapeValidator::visit(const ir::operation::Softmax &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
- OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape());
+ OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(beta_index).shape().rank() == 1);
}
void ShapeValidator::visit(const ir::operation::Pool2D &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
}
void ShapeValidator::visit(const ir::operation::Permute &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Reduce &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto input_shape = _ctx.at(input_index).shape();
- const auto output_shape = _ctx.at(output_index).shape();
+ const auto input_shape = operands.at(input_index).shape();
+ const auto output_shape = operands.at(output_index).shape();
OP_REQUIRES(input_shape.rank() <= 4);
OP_REQUIRES(output_shape.rank() <= input_shape.rank());
@@ -266,18 +269,20 @@ void ShapeValidator::visit(const ir::operation::Reduce &node)
void ShapeValidator::visit(const ir::operation::Transpose &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &output_shape = operands.at(output_index).shape();
+ const auto &input_shape = operands.at(input_index).shape();
- OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 ||
- input_shape.rank() == static_cast<int>(_ctx.at(perm_index).shape().num_elements()));
+ OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 ||
+ input_shape.rank() ==
+ static_cast<int>(operands.at(perm_index).shape().num_elements()));
OP_REQUIRES(input_shape.rank() == output_shape.rank());
}
@@ -285,8 +290,9 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
{
// NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
// TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto hidden_state_out_index{
@@ -299,35 +305,36 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
- const auto batch_size = _ctx.at(output_index).shape().dim(0);
- const auto num_units = _ctx.at(output_index).shape().dim(1);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_in_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
- OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
- OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
- OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
- num_units == _ctx.at(bias_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+ const auto batch_size = operands.at(output_index).shape().dim(0);
+ const auto num_units = operands.at(output_index).shape().dim(1);
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 2 &&
+ operands.at(hidden_state_out_index).shape().rank() == 2 &&
+ operands.at(input_index).shape().rank() == 2 &&
+ operands.at(weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_weights_index).shape().rank() == 2 &&
+ operands.at(hidden_state_in_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(bias_index).shape().rank() == 1);
+
+ OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_out_index).shape().dim(0));
+ OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1));
+
+ OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == operands.at(bias_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_out_index).shape().dim(1));
}
void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
@@ -335,39 +342,40 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
// All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2);
OP_REQUIRES(input_shape.C == output_shape.C);
}
void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
const auto block_size = node.param().block_size;
// All assertions as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
OP_REQUIRES(input_shape.N == output_shape.N);
OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
@@ -382,29 +390,31 @@ void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &values_obj = operands.at(values_index);
// Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
// TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
{
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto &output_shape = output_obj.shape();
@@ -427,26 +437,28 @@ void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
void ShapeValidator::visit(const ir::operation::ExpandDims &node)
{
+ const auto &operands = _graph.operands();
const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- if (_ctx.at(axis_index).info().isDynamic())
+ if (operands.at(axis_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
+ OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1);
}
void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &keys_obj = _ctx.at(keys_index);
- const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &keys_obj = operands.at(keys_index);
+ const auto &values_obj = operands.at(values_index);
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto &output_shape = output_obj.shape();
@@ -464,28 +476,30 @@ void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
void ShapeValidator::visit(const ir::operation::TransposeConv &node)
{
// shape check
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
// Only 4D tensors are supported
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank());
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank());
- const auto frontend_layout = _current_layout;
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
// The kernel has only IHWO layout on frontend
// So ker_shape is treated here below
// I -> N
// H -> H
// W -> W
// O -> C
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+ const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC);
OP_REQUIRES(ifm_shape.N == ofm_shape.N);
OP_REQUIRES(ifm_shape.C == ker_shape.C);
@@ -494,16 +508,17 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node)
void ShapeValidator::visit(const ir::operation::Gather &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto indices_shape = _ctx.at(indices_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
+ const auto ifm_shape = operands.at(ifm_index).shape();
+ const auto indices_shape = operands.at(indices_index).shape();
+ const auto ofm_shape = operands.at(ofm_index).shape();
OP_REQUIRES(ifm_shape.rank() <= 4);
OP_REQUIRES(indices_shape.rank() <= 3);
@@ -512,21 +527,22 @@ void ShapeValidator::visit(const ir::operation::Gather &node)
void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
{
+ const auto &operands = _graph.operands();
int32_t block_size = node.param().block_size;
// shape check
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
- const auto frontend_layout = _current_layout;
- const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
- const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout);
+ const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
{
OP_REQUIRES(output_shape.N == input_shape.N);
@@ -539,22 +555,23 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
void ShapeValidator::visit(const ir::operation::Pack &node)
{
+ const auto &operands = _graph.operands();
const auto axis{node.param().axis};
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
// shape check
- const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &output_shape = operands.at(output_index).shape();
const auto output_rank = static_cast<int32_t>(output_shape.rank());
const auto input1_index{node.getInputs().at(0)};
- const auto input_shape = _ctx.at(input1_index).shape();
+ const auto input_shape = operands.at(input1_index).shape();
OP_REQUIRES(axis >= -output_rank && axis < output_rank);
for (const auto &index : node.getInputs())
{
- OP_REQUIRES(input_shape == _ctx.at(index).shape());
+ OP_REQUIRES(input_shape == operands.at(index).shape());
}
}
@@ -562,8 +579,9 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
{
// NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
// TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto scratch_buffer_index{
@@ -611,91 +629,96 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
- for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i)
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+ for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i)
{
- OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
+ OP_REQUIRES(operands.at(input_index).shape().dim(i) ==
+ operands.at(output_index).shape().dim(i));
}
- OP_REQUIRES(
- (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
- (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
- (!_ctx.exist(input_to_input_weights_index) ||
- _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(projection_weights_index) ||
- _ctx.at(projection_weights_index).shape().rank() == 2) &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
-
- OP_REQUIRES(
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_forget_weights_index) ||
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_output_weights_index) ||
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
+ OP_REQUIRES((operands.at(output_index).shape().rank() == 2 ||
+ operands.at(output_index).shape().rank() == 3) &&
+ (operands.at(input_index).shape().rank() == 2 ||
+ operands.at(input_index).shape().rank() == 3) &&
+ (!operands.exist(input_to_input_weights_index) ||
+ operands.at(input_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ operands.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(projection_weights_index) ||
+ operands.at(projection_weights_index).shape().rank() == 2) &&
+ operands.at(output_state_in_index).shape().rank() == 2 &&
+ operands.at(cell_state_in_index).shape().rank() == 2);
+
+ OP_REQUIRES((!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().rank() == 1) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().rank() == 1) &&
+ operands.at(forget_gate_bias_index).shape().rank() == 1 &&
+ operands.at(cell_bias_index).shape().rank() == 1 &&
+ operands.at(output_gate_bias_index).shape().rank() == 1 &&
+ (!operands.exist(projection_bias_index) ||
+ operands.at(projection_bias_index).shape().rank() == 1));
// CIFG assertion
- OP_REQUIRES(
- ((!_ctx.exist(input_to_input_weights_index) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
- ((_ctx.exist(input_to_input_weights_index) &&
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
+ OP_REQUIRES(((!operands.exist(input_to_input_weights_index) ||
+ (operands.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(input_gate_bias_index) &&
+ operands.at(input_gate_bias_index).shape().dim(0) != 0)));
// Peephole assertion
- OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) ||
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
- (!_ctx.exist(cell_to_output_weights_index) ||
- _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
- ((_ctx.exist(cell_to_forget_weights_index) &&
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
- (_ctx.exist(cell_to_output_weights_index) &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0)));
-
- bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
+ OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
+ (operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0)));
+
+ bool has_input_to_input_weights =
+ operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0);
bool has_recurrent_to_input_weights =
- _ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+ operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
bool has_input_gate_bias =
- _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
- bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) &&
- _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
- bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.exist(projection_weights_index) &&
- (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0);
+ operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0;
+ bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) &&
+ operands.at(cell_to_input_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = operands.exist(projection_weights_index) &&
+ (operands.at(projection_weights_index).shape().dim(0) != 0 &&
+ operands.at(projection_weights_index).shape().dim(1) != 0);
bool has_projection_bias =
- _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
+ operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
@@ -710,46 +733,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
// NOTE The projection weights may have data but the projection bias may not.
bool has_projection_param = has_projection_weights;
- const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
- ? _ctx.at(input_index).shape().dim(1)
- : _ctx.at(input_index).shape().dim(0);
- OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
-
- const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1);
- OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
- const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0);
- OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
- num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_state_in_index).shape().dim(1));
+ const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major)
+ ? operands.at(input_index).shape().dim(1)
+ : operands.at(input_index).shape().dim(0);
+ OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(cell_state_in_index).shape().dim(0));
+
+ const auto input_size =
+ operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1);
+ OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_cell_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_output_weights_index).shape().dim(1));
+
+ const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0);
+ OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(input_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(forget_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_bias_index).shape().dim(0) &&
+ num_units == operands.at(output_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_state_in_index).shape().dim(1));
const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_in_index).shape().dim(1));
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) &&
+ output_size == operands.at(output_state_in_index).shape().dim(1));
if (has_cifg_param)
{
- OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
- ((_ctx.exist(cell_to_input_weights_index) &&
- num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0)) ||
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
- num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(
+ num_units == operands.at(input_to_input_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) &&
+ ((operands.exist(cell_to_input_weights_index) &&
+ num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) ||
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
+ num_units == operands.at(input_gate_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1));
OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
has_input_gate_bias);
if (has_cell_to_input_weights)
@@ -757,64 +782,65 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
// NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
OP_REQUIRES(has_peephole_param);
}
- if (_ctx.exist(scratch_buffer_index))
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
}
else
{
- if (_ctx.exist(scratch_buffer_index))
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
}
if (has_peephole_param)
{
- OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+ OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(cell_to_output_weights_index).shape().dim(0) &&
+ (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
}
if (has_projection_param)
{
- OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
- OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1));
+ OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0));
if (has_projection_bias)
{
- OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0));
}
}
- if (_ctx.exist(scratch_buffer_index))
+ if (operands.exist(scratch_buffer_index))
{
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0));
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0));
}
- if (_ctx.exist(output_state_out_index))
+ if (operands.exist(output_state_out_index))
{
- OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1));
+ OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1));
}
- if (_ctx.exist(cell_state_out_index))
+ if (operands.exist(cell_state_out_index))
{
- OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1));
+ OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1));
}
}
void ShapeValidator::visit(const ir::operation::L2Normalization &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto ifm_shape = operands.at(ifm_index).shape();
+ auto ofm_shape = operands.at(ofm_index).shape();
OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
@@ -826,14 +852,15 @@ void ShapeValidator::visit(const ir::operation::L2Normalization &node)
void ShapeValidator::visit(const ir::operation::Unpack &node)
{
+ const auto &operands = _graph.operands();
const auto axis{node.param().axis};
const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &input_shape = operands.at(input_index).shape();
const auto input_rank = static_cast<int32_t>(input_shape.rank());
OP_REQUIRES(axis >= -input_rank && axis < input_rank);
@@ -841,22 +868,23 @@ void ShapeValidator::visit(const ir::operation::Unpack &node)
void ShapeValidator::visit(const ir::operation::Pad &node)
{
+ const auto &operands = _graph.operands();
const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+ OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32);
const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto &pad_shape = _ctx.at(pad_index).shape();
- const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
+ const auto &pad_shape = operands.at(pad_index).shape();
+ const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank());
OP_REQUIRES(pad_shape.rank() == 2);
OP_REQUIRES(pad_shape.dim(0) == input_rank);
OP_REQUIRES(pad_shape.dim(1) == 2);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Select &)
@@ -866,65 +894,70 @@ void ShapeValidator::visit(const ir::operation::Select &)
void ShapeValidator::visit(const ir::operation::StridedSlice &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() <= 4);
}
void ShapeValidator::visit(const ir::operation::Split &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
const auto num_splits = node.param().num_splits;
- const auto input_rank = _ctx.at(input_index).shape().rank();
- auto axis = *reinterpret_cast<const int32_t *>(_ctx.at(axis_index).data()->base());
+ const auto input_rank = operands.at(input_index).shape().rank();
+ auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base());
axis = axis < 0 ? axis + input_rank : axis;
OP_REQUIRES(axis >= 0 && axis < input_rank);
- OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
+ OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0);
}
void ShapeValidator::visit(const ir::operation::Shape &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
UNUSED_RELEASE(input_index);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 1);
}
void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
{
return;
}
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
}
void ShapeValidator::visit(const ir::operation::Reverse &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::visit(const ir::operation::If &)
@@ -940,17 +973,18 @@ void ShapeValidator::visit(const ir::operation::While &)
void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- auto output_shape = _ctx.at(output_index).shape();
- auto lhs_shape = _ctx.at(lhs_index).shape();
- auto rhs_shape = _ctx.at(rhs_index).shape();
+ auto output_shape = operands.at(output_index).shape();
+ auto lhs_shape = operands.at(lhs_index).shape();
+ auto rhs_shape = operands.at(rhs_index).shape();
// Check for output rank
OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
@@ -982,36 +1016,40 @@ void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
}
void ShapeValidator::visit(const ir::operation::Tile &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
const auto multiple_index{node.getInputs().at(1)};
- OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(multiple_index).shape().dim(0) ==
+ operands.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Range &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(start_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(limit_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(delta_index).shape().rank() == 0);
}
void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
const auto num_lower_index{
@@ -1020,23 +1058,24 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
- OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
- OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+ OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+ OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+ OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
}
void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index 763cf7ce3..a51e8adc0 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -39,8 +39,13 @@ class ShapeValidator : public ir::OperationVisitor
public:
ShapeValidator(void) = delete;
ShapeValidator(const ir::Graph &graph);
+ ShapeValidator(const ShapeValidator &) = delete;
+ ShapeValidator(ShapeValidator &&) = delete;
+ ~ShapeValidator() = default;
public:
+ ShapeValidator &operator=(const ShapeValidator &) = delete;
+ ShapeValidator &operator=(ShapeValidator &&) = delete;
void operator()();
public:
@@ -90,10 +95,7 @@ private:
void checkUnaryOp(const ir::Operation &node);
private:
- // TODO Remove _ctx field
const ir::Graph &_graph;
- const ir::Operands &_ctx;
- ir::Layout _current_layout;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index f2fee2c3c..485450560 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -19,62 +19,90 @@
#include "util/logging.h"
#include <sstream>
+#include <stdexcept>
namespace onert
{
namespace compiler
{
-
-void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind)
+void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+ bool unpredictable)
{
- StaticShapeInferer inferer(subg_ind, _lowered_subgs);
- auto &lgraph = _lowered_subgs.at(subg_ind);
- for (auto op_ind : lgraph->graph().topolSortOperations())
+ assert(changed_operands_info.size() == _operands.size());
+ for (size_t i = 0; i < changed_operands_info.size(); ++i)
{
- auto &op = lgraph->graph().operations().at(op_ind);
- bool has_dynamic_tensor = inferer.infer(op);
- lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ const auto &changed_operand_info = changed_operands_info.at(i);
+ auto &operand = _operands.at(i);
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // This error check may by replaced by an assertion if this function is called after the
+ // validation of models are completed.
+ if (changed_operand_info.typeInfo() != operand->typeInfo())
+ {
+ throw std::runtime_error("OperandObserver: The types of operands are mismatched");
+ }
+ if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable))
+ {
+ operand->info().setDynamic();
+ }
+ else
+ {
+ const auto &new_shape = changed_operands_info.at(i).shape();
+ operand->info().shape(new_shape);
+ }
}
}
-bool StaticShapeInferer::infer(const ir::Operation &op)
+void StaticShapeInferer::infer()
{
- bool has_dynamic_tensor = false;
-
- auto opcode = op.opcode();
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- // IF: need shape inference for then, else
- // While: need shape inference for condition, body
- if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
- {
- op.accept(*this);
- }
- else
+ for (const auto &op_idx : _lowered_subg->graph().topolSortOperations())
{
- _return_has_dynamic_tensor = checkDynamicInput(op);
-
- if (_return_has_dynamic_tensor)
+ const auto &op = _lowered_subg->graph().operations().at(op_idx);
+ bool has_dynamic_tensor = false;
+ const auto opcode = op.opcode();
+ // IF: requires shape inference for then, else
+ // While: requires shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
{
- setDynamicOutput(op);
+ op.accept(*this);
}
else
{
- op.accept(*this);
+ has_dynamic_tensor = checkDynamicInput(op);
+ if (has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
}
+ has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op);
+ _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor);
}
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-
- return has_dynamic_tensor;
+ if (_controlflow_output_observer != nullptr)
+ {
+ // re-sizing output shapes of the controflow operation branching to this subgraph
+ std::vector<ir::OperandInfo> outputs_info;
+ const auto &graph = _lowered_subg->graph();
+ const auto &outputs = graph.getOutputs();
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ const auto &operand_info = graph.operands().at(outputs.at(i)).info();
+ outputs_info.emplace_back(operand_info);
+ }
+ _controlflow_output_observer->updateShapes(outputs_info);
+ }
}
bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
{
+ const auto &operands = _lowered_subg->graph().operands();
for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
{
- if (_operands.at(input_idx).info().isDynamic())
+ if (operands.at(input_idx).info().isDynamic())
{
return true;
}
@@ -83,11 +111,25 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
return false;
}
+bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ if (operands.at(output_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
{
+ auto &operands = _lowered_subg->graph().operands();
for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
{
- _operands.at(output_idx).info().setDynamic();
+ operands.at(output_idx).info().setDynamic();
}
}
@@ -95,11 +137,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
{
- const auto &lhs = _operands.at(lhs_idx);
- const auto &rhs = _operands.at(rhs_idx);
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &lhs = operands.at(lhs_idx);
+ const auto &rhs = operands.at(rhs_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
@@ -109,11 +152,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
const ir::OperandIndex input_idx)
{
- const auto &input = _operands.at(input_idx);
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape = input.info().shape();
@@ -136,36 +180,31 @@ void StaticShapeInferer::dump()
return sstream.str();
};
- for (const auto &pair : _lowered_subgs)
- {
- const auto index = pair.first;
- const auto &lowered_subg = pair.second;
- VERBOSE(StaticShapeInferer) << index << std::endl;
- lowered_subg->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- VERBOSE(StaticShapeInferer)
- << " " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
- << get_shape_str(operand.info().shape()) << std::endl;
- });
- }
+ _lowered_subg->graph().operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ VERBOSE(StaticShapeInferer) << " " << ind << ", "
+ << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+ << get_shape_str(operand.info().shape()) << std::endl;
+ });
}
void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!axis.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -181,27 +220,31 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
const auto output_index = op.getOutputs().at(0);
- const auto &lhs = _operands.at(lhs_index);
- const auto &rhs = _operands.at(rhs_index);
- auto &output = _operands.at(output_index);
+ const auto &lhs = operands.at(lhs_index);
+ const auto &rhs = operands.at(rhs_index);
+ auto &output = operands.at(output_index);
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto cluster_idx{
op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
- const auto &cluster = _operands.at(cluster_idx);
+ const auto &cluster = operands.at(cluster_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
assert(cluster_buf);
@@ -214,17 +257,19 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
- const auto &indices = _operands.at(indices_idx);
+ const auto &indices = operands.at(indices_idx);
const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
- const auto &input_binary = _operands.at(input_binary_idx);
+ const auto &input_binary = operands.at(input_binary_idx);
const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
- const auto &cluster = _operands.at(cluster_idx);
+ const auto &cluster = operands.at(cluster_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
assert(cluster_buf);
@@ -247,16 +292,16 @@ void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
{
// get mutable output operand
+ auto &operands = _lowered_subg->graph().operands();
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
if (!shape.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -276,16 +321,18 @@ void StaticShapeInferer::visit(const ir::operation::Comparison &op)
void StaticShapeInferer::visit(const ir::operation::Concat &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_count = op.getInputs().size();
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
shape_inference::Shapes input_shapes;
for (uint32_t i = 0; i < input_count; i++)
{
const auto input_idx{op.getInputs().at(i)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
input_shapes.emplace_back(input.shape());
}
@@ -297,12 +344,14 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
- const auto &ker = _operands.at(ker_idx);
+ const auto &ker = operands.at(ker_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape =
@@ -328,17 +377,18 @@ void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!axis.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -360,15 +410,16 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
void StaticShapeInferer::visit(const ir::operation::Fill &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!shape.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -390,15 +441,17 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
- const auto &ker = _operands.at(ker_idx);
+ const auto &ker = operands.at(ker_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -412,15 +465,17 @@ void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
void StaticShapeInferer::visit(const ir::operation::Gather &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
- const auto &indices = _operands.at(indices_idx);
+ const auto &indices = operands.at(indices_idx);
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -434,70 +489,21 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
void StaticShapeInferer::visit(const ir::operation::If &op)
{
- auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph();
- auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph();
+ // re-sizing input shapes of then/else subgraph
const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
- const auto &outputs = op.getOutputs();
- // re-sizing input shapes of then subgraph
- const auto &then_inputs = then_graph.getInputs();
- assert(inputs.size() == then_inputs.size());
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
for (size_t i = 0; i < inputs.size(); ++i)
{
- auto &then_input = then_graph.operands().at(then_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- then_input.info().setDynamic();
- }
- else
- {
- auto new_shape = _operands.at(inputs.at(i)).info().shape();
- then_input.info().shape(new_shape);
- }
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
}
+ _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().then_subg_index)->infer();
- // re-sizing input shapes of else subgraph
- const auto &else_inputs = else_graph.getInputs();
- assert(inputs.size() == else_inputs.size());
- for (size_t i = 0; i < inputs.size(); ++i)
- {
- auto &else_input = else_graph.operands().at(else_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- else_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = _operands.at(inputs.at(i)).info().shape();
- else_input.info().shape(new_shape);
- }
- }
-
- inferSubgraph(op.param().then_subg_index);
- inferSubgraph(op.param().else_subg_index);
-
- // re-sizing output shapes
- // TODO use then_graph / else_graph instead
- const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
- const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
- assert(outputs.size() == then_outputs.size());
- assert(outputs.size() == else_outputs.size());
- for (size_t i = 0; i < outputs.size(); ++i)
- {
- const auto &then_output = then_graph.operands().at(then_outputs.at(i));
- const auto &else_output = else_graph.operands().at(else_outputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (!then_output.info().isDynamic() && !else_output.info().isDynamic() &&
- then_output.shape() == else_output.shape())
- {
- output.info().shape(then_output.shape());
- }
- else
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- }
+ _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().else_subg_index)->infer();
}
void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
@@ -507,8 +513,10 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
void StaticShapeInferer::visit(const ir::operation::LSTM &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- auto &output = _operands.at(output_index);
+ auto &output = operands.at(output_index);
const auto output_state_out_index{
op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
@@ -518,24 +526,24 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
if (output.info().isDynamic() ||
- (_operands.exist(output_state_out_index) &&
- _operands.at(output_state_out_index).info().isDynamic()) ||
- (_operands.exist(cell_state_out_index) &&
- _operands.at(cell_state_out_index).info().isDynamic()) ||
- (_operands.exist(scratch_buffer_index) &&
- _operands.at(scratch_buffer_index).info().isDynamic()))
+ (operands.exist(output_state_out_index) &&
+ operands.at(output_state_out_index).info().isDynamic()) ||
+ (operands.exist(cell_state_out_index) &&
+ operands.at(cell_state_out_index).info().isDynamic()) ||
+ (operands.exist(scratch_buffer_index) &&
+ operands.at(scratch_buffer_index).info().isDynamic()))
return;
const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto input_to_output_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
+ const auto &input_to_output_weights = operands.at(input_to_output_weights_index);
const auto recurrent_to_output_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
+ const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index);
// re-sizing outputs
const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
@@ -555,21 +563,21 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
output.info().shape(ir::Shape{n_batch, n_output});
}
- if (_operands.exist(output_state_out_index))
+ if (operands.exist(output_state_out_index))
{
- auto &output_state_out = _operands.at(output_state_out_index);
+ auto &output_state_out = operands.at(output_state_out_index);
output_state_out.info().shape(ir::Shape{n_batch, n_output});
}
- if (_operands.exist(cell_state_out_index))
+ if (operands.exist(cell_state_out_index))
{
- auto &cell_state_out = _operands.at(cell_state_out_index);
+ auto &cell_state_out = operands.at(cell_state_out_index);
cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
}
- if (_operands.exist(scratch_buffer_index))
+ if (operands.exist(scratch_buffer_index))
{
- auto &scratch_buffer = _operands.at(scratch_buffer_index);
+ auto &scratch_buffer = operands.at(scratch_buffer_index);
const auto input_to_input_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
@@ -577,11 +585,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
bool has_input_to_input_weights =
- _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0;
bool has_recurrent_to_input_weights =
- _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
@@ -605,20 +613,21 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
void StaticShapeInferer::visit(const ir::operation::OneHot &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
- const auto &indice = _operands.at(indice_idx);
+ const auto &indice = operands.at(indice_idx);
const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
- const auto &depth = _operands.at(depth_idx);
+ const auto &depth = operands.at(depth_idx);
const auto axis = op.param().axis;
auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!depth.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -631,12 +640,14 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
void StaticShapeInferer::visit(const ir::operation::Pack &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto rank = input.shape().rank() + 1;
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -651,21 +662,22 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
void StaticShapeInferer::visit(const ir::operation::Pad &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto &pad = _operands.at(pad_idx);
+ const auto &pad = operands.at(pad_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// if pad is not constant, output also becomes dynamic
if (!pad.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -678,10 +690,12 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
void StaticShapeInferer::visit(const ir::operation::Permute &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
// Permute is a special operation that layouts of input/output may be different on backend
@@ -700,16 +714,18 @@ void StaticShapeInferer::visit(const ir::operation::Pow &op)
void StaticShapeInferer::visit(const ir::operation::Range &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
- const auto &start_op = _operands.at(start_idx);
- const auto &limit_op = _operands.at(limit_idx);
- const auto &delta_op = _operands.at(delta_idx);
+ const auto &start_op = operands.at(start_idx);
+ const auto &limit_op = operands.at(limit_idx);
+ const auto &delta_op = operands.at(delta_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
ir::Shape new_shape;
if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -731,21 +747,22 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
else
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
}
}
void StaticShapeInferer::visit(const ir::operation::Reduce &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
- const auto &axes = _operands.at(axes_idx);
+ const auto &axes = operands.at(axes_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
std::vector<int32_t> axes_vec;
for (size_t i = 0; i < axes.shape().num_elements(); ++i)
@@ -777,19 +794,21 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
void StaticShapeInferer::visit(const ir::operation::Reshape &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// New shape is given by second input tensor
if (op.getInputs().size() == 2)
{
// Let's check the second input
const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
if (shape.isConstant())
{
@@ -810,7 +829,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
{
// if shape is NOT Const, set output shape to be dynamic_
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
}
}
// New shape is given by option
@@ -835,21 +853,22 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
int32_t height_out, width_out;
if (op.getInputs().size() == 2)
{
- auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+ auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
if (!size.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
const auto size_v = size.asVector<std::int32_t>();
@@ -881,17 +900,19 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
- const auto &input_cond = _operands.at(input_cond_idx);
+ const auto &input_cond = operands.at(input_cond_idx);
const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
- const auto &input_true = _operands.at(input_true_idx);
+ const auto &input_true = operands.at(input_true_idx);
const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- const auto &input_false = _operands.at(input_false_idx);
+ const auto &input_false = operands.at(input_false_idx);
auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
@@ -901,12 +922,14 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
void StaticShapeInferer::visit(const ir::operation::Shape &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape output_shape;
@@ -917,20 +940,21 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
void StaticShapeInferer::visit(const ir::operation::Slice &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto &begins = _operands.at(begins_index);
+ const auto &begins = operands.at(begins_index);
const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
- const auto &sizes = _operands.at(sizes_index);
+ const auto &sizes = operands.at(sizes_index);
const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
+ ir::Operand &output = operands.at(output_index);
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(begins.isConstant() && sizes.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -959,21 +983,22 @@ void StaticShapeInferer::visit(const ir::operation::Softmax &op)
void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto output_index = op.getOutputs().at(0);
const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- ir::Operand &output = _operands.at(output_index);
- const auto &input = _operands.at(input_idx);
- const auto &block_shape = _operands.at(block_shape_idx);
- const auto &padding = _operands.at(padding_idx);
+ ir::Operand &output = operands.at(output_index);
+ const auto &input = operands.at(input_idx);
+ const auto &block_shape = operands.at(block_shape_idx);
+ const auto &padding = operands.at(padding_idx);
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(block_shape.isConstant() && padding.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -992,21 +1017,22 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
void StaticShapeInferer::visit(const ir::operation::Split &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
auto outputs = op.getOutputs();
if (!axis.isConstant())
{
for (auto output_idx : outputs)
{
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().setDynamic();
}
- _return_has_dynamic_tensor = true;
return;
}
@@ -1022,7 +1048,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
for (auto output_idx : outputs)
{
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().shape(new_shape);
}
}
@@ -1035,11 +1061,13 @@ void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// Squeeze output shpae
ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
@@ -1048,21 +1076,22 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto &starts = _operands.at(starts_index);
+ const auto &starts = operands.at(starts_index);
const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto &ends = _operands.at(ends_index);
+ const auto &ends = operands.at(ends_index);
const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- const auto &strides = _operands.at(strides_index);
+ const auto &strides = operands.at(strides_index);
const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
+ ir::Operand &output = operands.at(output_index);
if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1085,19 +1114,20 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
void StaticShapeInferer::visit(const ir::operation::Tile &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
- const auto &multiplier = _operands.at(multiplier_idx);
+ const auto &multiplier = operands.at(multiplier_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!multiplier.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1112,11 +1142,13 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
void StaticShapeInferer::visit(const ir::operation::Transpose &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- const auto &perm = _operands.at(perm_idx);
+ const auto &perm = operands.at(perm_idx);
// perm.shape() != ir::Shape{0} means that perm is (n-1...0)
// TODO This condition changes to perm.num_elements() == 0
@@ -1124,11 +1156,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- auto &output = _operands.at(output_idx);
+ auto &output = operands.at(output_idx);
if (!perm.isConstant() && !is_regular_transpose)
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1157,8 +1188,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
void StaticShapeInferer::visit(const ir::operation::Unpack &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto num = op.param().num;
const auto rank = input.shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -1169,10 +1202,9 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
{
const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().setDynamic();
}
- _return_has_dynamic_tensor = true;
return;
}
@@ -1182,69 +1214,43 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
{
const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().shape(new_shape);
}
}
void StaticShapeInferer::visit(const ir::operation::While &op)
{
- auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph();
- auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph();
+ auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get();
+ auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get();
+ // re-sizing input shapes of body subgraph
const auto inputs = op.getInputs();
- const auto &outputs = op.getOutputs();
-
- // re-sizing input shapes of then subgraph
- const auto &cond_inputs = cond_graph.getInputs();
- assert(inputs.size() == cond_inputs.size());
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
for (size_t i = 0; i < inputs.size(); ++i)
{
- const auto &input = _operands.at(inputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if (input.info().isDynamic())
- {
- cond_input.info().setDynamic();
- }
- else
- {
- auto new_shape = input.info().shape();
- cond_input.info().shape(new_shape);
- }
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
}
- // re-sizing input shapes of body subgraph
- const auto &body_inputs = body_graph.getInputs();
- assert(cond_inputs.size() == body_inputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &body_input = body_graph.operands().at(body_inputs.at(i));
- if (cond_input.info().isDynamic())
- {
- body_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = cond_input.info().shape();
- body_input.info().shape(new_shape);
- }
- }
-
- // re-sizing operands of body subgraph
- inferSubgraph(op.param().body_subg_index);
+ body_input_observer->updateShapes(inputs_info);
+ _child_inferers.at(op.param().body_subg_index)->infer();
// Check whether while operation's shapes are predictable
- // If any of shape of body outputs and cond inputs are different, non-constant operands would be
- // set to dynamic
+ // This while op's outputs are also updated in the above function
+ // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and
+ // thils op's outputs must have the same shape. So we can predict whether body subgraphs will
+ // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs
+ // and inputs are different Non-constant operands will be set to dynamic.
bool check_unpredictable_dynamic = false;
- const auto &body_outputs = body_graph.getOutputs();
- assert(body_outputs.size() == cond_inputs.size());
- for (size_t i = 0; i < body_outputs.size(); ++i)
+ const auto &updated_outputs = op.getOutputs();
+ assert(inputs_info.size() == updated_outputs.size());
+ for (size_t i = 0; i < updated_outputs.size(); ++i)
{
- const auto &body_output = body_graph.operands().at(body_outputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) ||
- (cond_input.shape() != body_output.shape()))
+ const auto &input_info = inputs_info.at(i);
+ const auto &output_info = graph.operands().at(updated_outputs.at(i)).info();
+ if (input_info.isDynamic() != output_info.isDynamic() ||
+ input_info.shape() != output_info.shape())
{
check_unpredictable_dynamic = true;
break;
@@ -1253,53 +1259,11 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
if (check_unpredictable_dynamic)
{
- // Set inputs of body subgraph
- for (const auto &input_index : body_inputs)
- {
- auto &input = body_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set inputs of cond subgraph
- for (const auto &input_index : cond_inputs)
- {
- auto &input = cond_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set non-constant operands of body subgraph to dynamic
- inferSubgraph(op.param().body_subg_index);
- }
-
- // re-sizing operands of cond subgraph
- // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
- // dynamic
- inferSubgraph(op.param().cond_subg_index);
-
- // re-sizing outputs of while operation
- // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
- assert(cond_inputs.size() == outputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (cond_input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- else
- {
- const auto new_shape = cond_input.info().shape();
- output.info().shape(new_shape);
- }
+ body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().body_subg_index)->infer();
}
+ cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().cond_subg_index)->infer();
}
void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
@@ -1307,24 +1271,52 @@ void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
// TODO: NMS supports very limited input/output size.
ir::operation::DetectionPostProcess::Param param = op.param();
+ auto &operands = _lowered_subg->graph().operands();
const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
const auto output_idx1 = op.getOutputs().at(0);
- auto &output1 = _operands.at(output_idx1);
+ auto &output1 = operands.at(output_idx1);
output1.info().shape({1, num_detected_boxes, 4});
const auto output_idx2 = op.getOutputs().at(1);
- auto &output2 = _operands.at(output_idx2);
+ auto &output2 = operands.at(output_idx2);
output2.info().shape({1, num_detected_boxes});
const auto output_idx3 = op.getOutputs().at(2);
- auto &output3 = _operands.at(output_idx3);
+ auto &output3 = operands.at(output_idx3);
output3.info().shape({1, num_detected_boxes});
const auto output_idx4 = op.getOutputs().at(3);
- auto &output4 = _operands.at(output_idx4);
+ auto &output4 = operands.at(output_idx4);
output4.info().shape({1});
}
+void StaticShapeInferer::visit(const ir::operation::Bulk &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ // TODO: support multiple inputs/outputs
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ auto cur_input_shape = input.info().shape();
+ auto origin_input_shape = op.param().origin_input_shapes[0];
+ auto cur_output_shape = output.info().shape();
+ auto origin_output_shape = op.param().origin_output_shapes[0];
+
+ // TODO: more check for valid batch request
+ assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
+ assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+ size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
+
+ ir::Shape new_shape;
+ new_shape.append(origin_output_shape.dim(0) * batch_multiplier);
+ for (int32_t d = 1; d < origin_output_shape.rank(); ++d)
+ new_shape.append(origin_output_shape.dim(d));
+
+ output.info().shape(new_shape);
+}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 2a99db781..b3cc0bbe3 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -17,13 +17,14 @@
#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/TensorRegistry.h"
+
#include "backend/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/TensorRegistry.h"
+#include "backend/BackendContext.h"
+
+#include <memory>
+#include <unordered_set>
namespace onert
{
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 181f388de..c27ce3d09 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,7 +15,6 @@
*/
#include "PermutationEliminationPass.h"
-#include "backend/builtin/Config.h"
#include "util/logging.h"
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 6f9899114..71efa1bb5 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -17,18 +17,16 @@
#include "PermutationInsertionPass.h"
-#include <cassert>
-#include <utility>
-#include <unordered_map>
+#include "../../backend/builtin/Config.h"
-#include "backend/builtin/Config.h"
-#include "ir/Operand.h"
#include "compiler/OperationLowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
+#include "ir/operation/Permute.h"
#include "util/logging.h"
+
+#include <cassert>
#include <memory>
-#include "ir/operation/Permute.h"
+#include <unordered_map>
+#include <utility>
namespace onert
{
@@ -125,6 +123,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
// backend
auto &model_outputs = _graph.getOutputs();
const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+ assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID);
+
if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
{
model_outputs.replace(operand_index, out_operand_index);
@@ -141,6 +141,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
+ assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID);
+
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
index b18dedd15..572b4df24 100644
--- a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
@@ -14,9 +14,11 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
+#include "UnusedOperandEliminationPass.h"
+
#include "ir/Graph.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
+
+#include <gtest/gtest.h>
using namespace onert::ir;
using namespace onert::compiler::pass;
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 714fb6fda..0bb2fa11f 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,6 +19,7 @@
#include "DotDumper.h"
#include "DotBuilder.h"
+#include "ir/OperandIndexMap.h"
#include "ir/OperationIndexMap.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
@@ -31,97 +32,72 @@ namespace dumper
namespace dot
{
-void DotDumper::dump(const std::string &tag)
+namespace
{
- if (_level == Level::OFF)
- {
- return;
- }
-
- onert::dumper::dot::DotBuilder dot_builder;
-
- auto &operations = _graph.operations();
- auto &operands = _graph.operands();
-
- ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
- std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
- auto backend_to_fillcolor = [](const backend::Backend *backend) {
- static const auto map = []() {
- std::unordered_map<const backend::Backend *, std::string> ret;
- uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
- for (const auto backend : compiler::BackendManager::get().getAll())
- {
- ret.emplace(backend, Node::BG_COLORS[index]);
- index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
- }
- return ret;
- }();
-
- auto itr = map.find(backend);
- if (itr == map.end())
- {
- return Node::DEFAULT_FILLCOLOR;
- }
- else
+std::string backend_to_fillcolor(const backend::Backend *backend)
+{
+ static const auto map = []() {
+ std::unordered_map<const backend::Backend *, std::string> ret;
+ uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+ for (const auto backend : compiler::BackendManager::get().getAll())
{
- return itr->second;
+ ret.emplace(backend, Node::BG_COLORS[index]);
+ index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
}
- };
+ return ret;
+ }();
+ auto itr = map.find(backend);
+ if (itr == map.end())
+ {
+ return Node::DEFAULT_FILLCOLOR;
+ }
+ else
+ {
+ return itr->second;
+ }
+}
- util::Set<ir::OperandIndex> shown_operand_set;
+std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>>
+generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level)
+{
+ std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands;
+ const auto &operands = graph.operands();
operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
- bool showing_cond = false;
- if (_level == Level::ALL)
- {
- showing_cond = true;
- }
- else
- {
- showing_cond =
- !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
- }
+ bool showing_cond =
+ level == DotDumper::Level::ALL
+ ? true
+ : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index);
if (showing_cond)
{
- shown_operand_set.add(index);
-
auto type = [&]() {
using onert::dumper::dot::Operand;
- if (_graph.getInputs().contains(index))
+ if (graph.getInputs().contains(index))
return Operand::Type::MODEL_INPUT;
- if (_graph.getOutputs().contains(index))
+ if (graph.getOutputs().contains(index))
return Operand::Type::MODEL_OUTPUT;
return Operand::Type::INTERNAL;
}();
auto node = std::make_unique<Operand>(index, type);
+ std::string label = std::to_string(index.value());
+ std::string fillcolor = "";
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
- {
- // Display LowerInfo attributes
- std::string label = std::to_string(index.value());
- std::string fillcolor = "";
- if (_lowered_graph)
- {
- auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index);
- const auto &def_factors = lower_info->def_factors();
- if (def_factors.size() > 0)
- {
- label += "\\n[";
- label += def_factors.getOnlyElement().backend()->config()->id();
- label += "]";
-
- fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
- }
- }
- node->setAttribute("label", label);
- node->setAttribute("fillcolor", fillcolor);
- }
-
- operand_nodes.emplace(index, std::move(node));
+ dot_operands.emplace(index, std::move(node));
}
});
+ return dot_operands;
+}
+
+ir::OperationIndexMap<std::unique_ptr<Operation>>
+generate_dot_operations(const ir::Graph &graph,
+ const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands)
+{
+ ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations;
+ const auto &operations = graph.operations();
operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
auto node = std::make_unique<Operation>(index, op);
@@ -130,42 +106,79 @@ void DotDumper::dump(const std::string &tag)
using onert::dumper::dot::Operand;
// Constant input and dump level is ALL_BUT_CONSTANTS
- if (operand_nodes.find(input) == operand_nodes.end())
+ if (dot_operands.find(input) == dot_operands.end())
continue;
- auto &input_node = operand_nodes.at(input);
+ auto &input_node = dot_operands.at(input);
input_node->addOutEdge(node.get());
}
for (auto output : op.getOutputs() | ir::Remove::UNDEFINED)
{
using onert::dumper::dot::Operand;
- auto &output_node = operand_nodes.at(output);
+ auto &output_node = dot_operands.at(output);
node->addOutEdge(output_node.get());
}
- operation_nodes.emplace(index, std::move(node));
+ dot_operations.emplace(index, std::move(node));
});
- if (_lowered_graph)
- {
- _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
- const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index);
- if (lower_info)
+ return dot_operations;
+}
+
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+ ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands)
+{
+ const auto &operands = lowered_graph.graph().operands();
+ operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ auto itr = dot_operands->find(index);
+ if (itr != dot_operands->end())
+ {
+ auto &node = itr->second;
+ // Display LowerInfo attributes
+ std::string label = node->getAttribute("label");
+ std::string fillcolor = node->getAttribute("fillcolor");
+ auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index);
+ const auto &def_factors = lower_info->def_factors();
+ if (def_factors.size() > 0)
{
- auto fillcolor = backend_to_fillcolor(lower_info->backend());
- std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
- auto itr = operation_nodes.find(index);
- if (itr != operation_nodes.end())
- {
- auto &node = itr->second;
- node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
- node->setAttribute("fillcolor", fillcolor);
- }
+ label += "\\n[";
+ label += def_factors.getOnlyElement().backend()->config()->id();
+ label += "]";
+ fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
}
- });
- }
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+ });
+}
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+ ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations)
+{
+ const auto &operations = lowered_graph.graph().operations();
+ operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index);
+ if (lower_info)
+ {
+ auto fillcolor = backend_to_fillcolor(lower_info->backend());
+ std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+ auto itr = dot_operations->find(index);
+ if (itr != dot_operations->end())
+ {
+ auto &node = itr->second;
+ node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+ }
+ });
+}
+
+void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes,
+ const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes,
+ const std::string &tag)
+{
+ onert::dumper::dot::DotBuilder dot_builder;
for (const auto &e : operation_nodes)
dot_builder.update(*e.second);
for (const auto &e : operand_nodes)
@@ -186,6 +199,33 @@ void DotDumper::dump(const std::string &tag)
fb.close();
}
}
+} // namespace
+
+void DotDumper::dump(const ir::Graph &graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ const auto dot_operands = generate_dot_operands(graph, _level);
+ const auto dot_operations = generate_dot_operations(graph, dot_operands);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
+
+void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level);
+ auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands);
+ update_lower_info(lowered_graph, &dot_operands);
+ update_lower_info(lowered_graph, &dot_operations);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
} // namespace dot
} // namespace dumper
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index f300c3432..6249010d3 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -38,27 +38,28 @@ public:
};
public:
- DotDumper(const ir::Graph &graph, Level level)
- : _lowered_graph{nullptr}, _graph(graph), _level{level}
- {
- }
- DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
- : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
- {
- }
+ DotDumper(Level level) : _level{level} {}
public:
/**
- * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ *
+ * @param[in] graph The graph that would be used to get operations and operands
+ * @param[in] tag The name of dot file that would be created
+ * @return N/A
+ */
+ void dump(const ir::Graph &graph, const std::string &tag);
+
+ /**
+ * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
*
+ * @param[in] graph The graph that would be used to get operations and operands
* @param[in] tag The name of dot file that would be created
* @return N/A
*/
- void dump(const std::string &tag);
+ void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag);
private:
- const compiler::LoweredGraph *_lowered_graph;
- const ir::Graph &_graph;
Level _level;
};
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index bcac19d2e..1649be733 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -17,19 +17,18 @@
#ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
#define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
+#include "ExecutorBase.h"
#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/ExecutorBase.h"
+
#include "compiler/CodeMap.h"
+#include "ir/OperandIndexSequence.h"
#include "util/TracingCtx.h"
+#include <list>
+#include <map>
+#include <memory>
+#include <unordered_map>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
index 6bf2744a9..4b82655b9 100644
--- a/runtime/onert/core/src/exec/ExecTime.cc
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
-#include <fstream>
-#include <cassert>
-#include <limits>
#include <algorithm>
+#include <cassert>
namespace onert
{
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc
index 178b61ea5..1f7152e7b 100644
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ b/runtime/onert/core/src/exec/ExecTime.test.cc
@@ -14,10 +14,13 @@
* limitations under the License.
*/
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
+
#include "backend/IConfig.h"
#include "backend/Backend.h"
+
#include <gtest/gtest.h>
+
#include <string>
namespace
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 8eff73bac..9d1e06d6c 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -23,13 +23,12 @@ namespace onert
namespace exec
{
-Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
{
assert(executors != nullptr);
assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
- const auto &primary_subg = primary_subgraph();
- _io_desc.inputs.resize(primary_subg.getInputs().size());
- _io_desc.outputs.resize(primary_subg.getOutputs().size());
+ _io_desc.inputs.resize(_executors->inputSize());
+ _io_desc.outputs.resize(_executors->outputSize());
sem_init(&_async_io_descs_sem, 0, 1);
}
@@ -48,8 +47,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
ir::Layout layout)
{
- const auto input_index = primary_subgraph().getInputs().at(index);
- const auto info = primary_subgraph().operands().at(input_index).info();
+ const auto info = _executors->inputInfo(index);
// TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
@@ -105,8 +103,7 @@ bool Execution::isEmptyQueue()
void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
ir::Layout layout)
{
- const auto input_index = primary_subgraph().getInputs().at(index);
- const auto info = primary_subgraph().operands().at(input_index).info();
+ const auto info = _executors->inputInfo(index);
IODescription *_async_io_desc = _async_io_descs.back().first;
{
@@ -135,8 +132,7 @@ void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer,
void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
ir::Layout layout)
{
- const auto output_index = primary_subgraph().getOutputs().at(index);
- const auto info = primary_subgraph().operands().at(output_index).info();
+ const auto info = _executors->outputInfo(index);
IODescription *_async_io_desc = _async_io_descs.front().first;
if (length < info.total_size())
@@ -165,8 +161,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con
// TODO Remove default parameter
void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
{
- const auto output_index = primary_subgraph().getOutputs().at(index);
- const auto info = primary_subgraph().operands().at(output_index).info();
+ const auto info = _executors->outputInfo(index);
if (length < info.total_size())
{
@@ -208,7 +203,7 @@ void Execution::execute()
{
VERBOSE(Execution) << "Start execution" << std::endl;
- primary_executor()->execute(_io_desc);
+ _executors->execute(_io_desc);
finished = true;
VERBOSE(Execution) << "Execution finished" << std::endl;
@@ -248,8 +243,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
auto itr = _io_desc.dynamic_input_shapes.find(ind);
if (itr == _io_desc.dynamic_input_shapes.end())
{
- auto operand_idx = primary_subgraph().getInputs().at(ind);
- return primary_subgraph().operands().at(operand_idx).shape();
+ return _executors->inputInfo(ind).shape();
}
else
{
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/core/src/exec/Execution.test.cc
index 0183b6276..e3ea49470 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -14,15 +14,16 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-#include <thread>
+#include "exec/Execution.h"
-#include "ir/Graph.h"
#include "compiler/Compiler.h"
-#include "exec/Execution.h"
+#include "ir/Graph.h"
#include "ir/operation/BinaryArithmetic.h"
#include "util/TracingCtx.h"
+#include <gtest/gtest.h>
+#include <thread>
+
namespace
{
@@ -76,24 +77,24 @@ public:
graph->verify();
// Compile
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, graph);
- tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
- onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
- executors = compiler.compile();
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ artifact = compiler.compile();
}
public:
std::shared_ptr<Graph> graph;
- std::shared_ptr<onert::exec::ExecutorMap> executors;
- std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
+ std::unique_ptr<onert::compiler::CompilerOptions> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
};
TEST(ExecInstance, simple)
{
auto mockup = CompiledMockUpModel();
auto graph = mockup.graph;
- auto executors = mockup.executors;
+ auto executors = mockup.artifact->_executors;
auto input1 = IOIndex{0};
auto input2 = IOIndex{1};
@@ -121,7 +122,7 @@ TEST(ExecInstance, twoCompile)
{
auto mockup = CompiledMockUpModel();
auto graph = mockup.graph;
- auto executors1 = mockup.executors;
+ auto executors1 = mockup.artifact->_executors;
onert::exec::Execution execution1{executors1};
auto input1 = IOIndex{0};
@@ -138,12 +139,12 @@ TEST(ExecInstance, twoCompile)
execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
// Make new executor: compile again
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, graph);
- auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
- onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
- std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
- onert::exec::Execution execution2{executors2};
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile();
+ onert::exec::Execution execution2{artifact->_executors};
const float exe2_input1_buffer[4] = {2, 1, -2, 0};
const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
@@ -168,7 +169,7 @@ TEST(ExecInstance, twoCompile)
TEST(ExecInstance, twoExecution)
{
auto mockup = CompiledMockUpModel();
- auto executors = mockup.executors;
+ auto executors = mockup.artifact->_executors;
auto input1 = IOIndex{0};
auto input2 = IOIndex{1};
auto output1 = IOIndex{0};
@@ -208,7 +209,7 @@ class Inference
{
public:
Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
- std::shared_ptr<onert::exec::ExecutorMap> &executors)
+ std::shared_ptr<onert::exec::Executors> &executors)
: _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
{
// DO NOTHING
@@ -232,14 +233,14 @@ private:
const float (&_input1)[4];
const float (&_input2)[4];
float (&_output)[4];
- std::shared_ptr<onert::exec::ExecutorMap> &_executors;
+ std::shared_ptr<onert::exec::Executors> &_executors;
};
// Support multi-thread execution
TEST(ExecInstance, twoThreads)
{
auto mockup = CompiledMockUpModel();
- auto executors = mockup.executors;
+ auto executors = mockup.artifact->_executors;
const float exe1_input1_buffer[4] = {1, 0, -1, -2};
const float exe1_input2_buffer[4] = {1, -3, 2, -4};
@@ -273,7 +274,7 @@ TEST(ExecInstance, async)
{
auto mockup = CompiledMockUpModel();
auto graph = mockup.graph;
- auto executors = mockup.executors;
+ auto executors = mockup.artifact->_executors;
auto input1 = IOIndex{0};
auto input2 = IOIndex{1};
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 423b5026b..3ee1754c9 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -17,11 +17,12 @@
#ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
#define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
-#include <list>
+#include "ExecutionObservers.h"
-#include "exec/ExecutionObservers.h"
#include "ir/Index.h"
+#include <list>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 386178ae6..9abde7ba4 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "exec/ExecutionObservers.h"
+#include "ExecutionObservers.h"
-#include <string>
-#include <sstream>
+#include "../util/EventWriter.h"
#include "util/logging.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/Operation.h"
-#include "util/EventWriter.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <string>
+#include <sstream>
namespace
{
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index 4c6c7b18e..1aadac2f5 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -17,17 +17,16 @@
#ifndef __ONERT_EXEC_OBSREVERS_H__
#define __ONERT_EXEC_OBSREVERS_H__
-#include "exec/IFunction.h"
+#include "ExecTime.h"
+#include "../util/EventCollector.h"
+#include "../util/EventRecorder.h"
+#include "../util/EventWriter.h"
+
+#include "exec/Executors.h"
#include "ir/Index.h"
#include "ir/Operation.h"
-#include "ExecTime.h"
#include "util/ITimer.h"
-#include "exec/IExecutor.h"
-#include "util/EventCollector.h"
-#include "util/EventRecorder.h"
-#include "util/EventWriter.h"
#include "util/TracingCtx.h"
-#include "util/EventWriter.h"
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index efc22cfa5..d2d204a0b 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -15,11 +15,10 @@
*/
#include "ExecutorBase.h"
+
#include "ShapeConverter.h"
-#include "backend/builtin/UserTensor.h"
-#include "util/logging.h"
-#include "misc/polymorphic_downcast.h"
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index c0f609d11..e4f914546 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,22 +17,17 @@
#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
#define __ONERT_EXEC_EXECUTOR_BASE_H__
-#include "IPermuteFunction.h"
+#include "ExecutionObservee.h"
+#include "../backend/builtin/IOTensor.h"
+#include "../compiler/TensorRegistries.h"
+
+#include "compiler/LoweredGraph.h"
#include "exec/IExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservee.h"
-#include "exec/IFunction.h"
#include "exec/IODescription.h"
#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "compiler/GraphLowerInfo.h"
#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/builtin/IOTensor.h"
#include "util/TracingCtx.h"
-#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
new file mode 100644
index 000000000..e0ee24fea
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Executors.h"
+
+namespace onert
+{
+namespace exec
+{
+
+uint32_t Executors::inputSize() const
+{
+ return _model_edges ? _model_edges->pkg_inputs.size()
+ : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+}
+
+uint32_t Executors::outputSize() const
+{
+ return _model_edges ? _model_edges->pkg_outputs.size()
+ : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+}
+
+const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+{
+ if (_model_edges)
+ {
+ // Assume that each model may have only one subgraph
+ // TODO handle general case
+ const auto desc = _model_edges->pkg_inputs[index.value()];
+ const auto model_idx = std::get<0>(desc);
+ const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+ const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
+ return _executors.at(executor_idx)->graph().operands().at(input_index).info();
+ }
+
+ const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
+ return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
+}
+
+const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
+{
+ if (_model_edges)
+ {
+ // Assume that each model may have only one subgraph
+ // TODO handle general case
+ auto desc = _model_edges->pkg_outputs[index.value()];
+ auto model_idx = std::get<0>(desc);
+ auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+ auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
+ return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+ }
+
+ auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
+ return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+}
+
+void Executors::execute(const IODescription &desc)
+{
+ if (_model_edges)
+ return executeEntries(desc);
+
+ _executors.at(ir::SubgraphIndex{0})->execute(desc);
+}
+
+void Executors::executeEntries(const IODescription &desc)
+{
+ // Assume 2 executors only
+ // Assume that each model may have only one subgraph
+ // TODO Support general case
+ if (_executors.size() != 2)
+ throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+
+ // Assume all edges are 0:0:x -> 1:0:x
+ for (auto edge : _model_edges->edges)
+ {
+ if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
+ (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
+ (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
+ throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+ }
+
+ // Assume all package inputs are 0:0:x
+ for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+ {
+ auto input = _model_edges->pkg_inputs[i];
+ if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+ {
+ throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+ }
+ }
+
+ // Assume all package outputs are 1:0:x
+ for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+ {
+ auto output = _model_edges->pkg_outputs[i];
+ if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
+ (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+ {
+ throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+ }
+ }
+
+ const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
+ const auto &graph1 = executor1->graph();
+ const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
+ const auto &graph2 = executor2->graph();
+
+ if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
+ (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
+ (graph1.getOutputs().size() != graph2.getInputs().size()) ||
+ (graph1.getOutputs().size() != _model_edges->edges.size()))
+ {
+ throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+ }
+
+ // Prepare buffer
+ // Assume buffer layout is NHWC
+ std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
+ std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
+ const auto layout = ir::Layout::NHWC;
+
+ for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+ {
+ const auto buf_index =
+ _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
+ buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
+ const auto buf_size = buf_infos[i]->total_size();
+ bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+ }
+
+ // 1st executor
+ {
+ IODescription desc1;
+ const auto input_size = graph1.getInputs().size();
+ const auto output_size = graph1.getOutputs().size();
+ desc1.inputs.resize(input_size);
+ desc1.outputs.resize(output_size);
+ for (uint32_t i = 0; i < input_size; i++)
+ desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
+ for (uint32_t i = 0; i < output_size; i++)
+ desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
+ buf_infos[i]->total_size(), layout);
+
+ executor1->execute(desc1);
+ }
+
+ // 2nd executor
+ {
+ IODescription desc2;
+ const auto input_size = graph2.getInputs().size();
+ const auto output_size = graph2.getOutputs().size();
+ desc2.inputs.resize(input_size);
+ desc2.outputs.resize(output_size);
+ for (uint32_t i = 0; i < input_size; i++)
+ desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
+ buf_infos[i]->total_size(), layout);
+ for (uint32_t i = 0; i < output_size; i++)
+ desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+
+ executor2->execute(desc2);
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index df68b1b64..f87c271f7 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -34,9 +34,7 @@ void FunctionSequence::run()
// Thus, those two bakends cannot reach here.
// Do dynamic shape inference
- auto op_ind = _dynamic_tensor_ctx->op_ind;
- auto &op = _dynamic_tensor_ctx->operations->at(op_ind);
- op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
+ _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
for (const auto &function : _functions)
{
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index b29216a2f..d149345fd 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "exec/JSONExecTime.h"
-#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+
#include <fstream>
namespace onert
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 39d653154..a833466da 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -22,11 +22,10 @@
#ifndef __ONERT_EXEC_EXECUTOR_H_
#define __ONERT_EXEC_EXECUTOR_H_
-#include "ir/Index.h"
#include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
+
#include "compiler/CodeMap.h"
+#include "ir/Index.h"
#include "util/TracingCtx.h"
namespace onert
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 7f107fa22..7d459b0b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -17,19 +17,13 @@
#ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
#define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
-#include <list>
-#include <queue>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/DataflowExecutor.h"
+#include "DataflowExecutor.h"
#include "ParallelScheduler.h"
+
#include "util/TracingCtx.h"
+#include <memory>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h
new file mode 100644
index 000000000..1d2d375e2
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/MockTensor.h
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/ITensor.h"
+
+template <typename T> class MockTensor : public onert::backend::ITensor
+{
+public:
+ MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout)
+ : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout)
+ {
+ }
+
+public:
+ uint8_t *buffer() const override { return _buf; }
+
+ size_t calcOffset(const onert::ir::Coordinates &coords) const override
+ {
+ size_t rank = _shape.rank();
+ rank = rank == 0 ? 1 : rank;
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i);
+ offset = offset * dim + coords[i];
+ }
+ offset *= sizeof(T);
+
+ return offset;
+ }
+
+ onert::ir::Shape getShape() const override { return _shape; }
+
+public: // DUMMY methods
+ size_t total_size() const override { return 0; }
+ onert::ir::Layout layout() const override { return _layout; }
+ onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
+ float data_scale() const override { return 0; }
+ int32_t data_zero_point() const override { return 0; }
+ const std::vector<float> &data_scales() const override { return _dummy_scales; }
+ const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; }
+ bool has_padding() const override { return false; }
+ void access(const std::function<void(ITensor &tensor)> &fn) override {}
+ bool is_dynamic() const override { return false; }
+
+private:
+ uint8_t *_buf = nullptr;
+ onert::ir::Shape _shape;
+ onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN;
+ std::vector<float> _dummy_scales;
+ std::vector<int32_t> _dummy_zerops;
+};
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
new file mode 100644
index 000000000..f439cafb5
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _reader = std::make_shared<nchw::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nchw, ReaderTypes);
+
+TYPED_TEST(Reader_nchw, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
new file mode 100644
index 000000000..c6dcda710
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _view = std::make_shared<nchw::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nchw::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nchw, ViewTypes);
+
+TYPED_TEST(View_nchw, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
new file mode 100644
index 000000000..773199042
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes);
+TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes);
+
+TYPED_TEST(Reader_nhwc, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index 40d1d237c..c98d050c3 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
#define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include <cassert>
#include <cstddef>
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
new file mode 100644
index 000000000..bdd73d5a7
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _view = std::make_shared<nhwc::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nhwc, ViewTypes);
+TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes);
+
+TYPED_TEST(View_nhwc, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
index 44d1575d7..f04777174 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ b/runtime/onert/core/src/interp/InterpExecutor.cc
@@ -14,9 +14,10 @@
* limitations under the License.
*/
-#include "interp/InterpExecutor.h"
-#include "interp/ExecEnv.h"
-#include "interp/Interpreter.h"
+#include "InterpExecutor.h"
+
+#include "ExecEnv.h"
+#include "Interpreter.h"
#include "util/logging.h"
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
index df6153d09..d6d5dd0a3 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -74,7 +74,12 @@ public:
}
private:
- const ir::Graph &_graph;
+ /**
+ * @brief Copy of target graph for lowering
+ * @note It uses copy of graph, not reference.
+ * Original graph may be deallocated by frontend.
+ */
+ const ir::Graph _graph;
ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
};
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc
index a9f7cd46a..9f95ffee0 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/core/src/interp/InterpExecutor.test.cc
@@ -14,22 +14,23 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
-#include <memory>
+#include "InterpExecutor.h"
-#include "ir/Graph.h"
-#include "interp/InterpExecutor.h"
#include "exec/Execution.h"
+#include "ir/Graph.h"
#include "ir/operation/BinaryArithmetic.h"
+#include <gtest/gtest.h>
+
+#include <memory>
+
namespace
{
using namespace onert::ir;
using InterpExecutor = onert::interp::InterpExecutor;
using Execution = onert::exec::Execution;
-using ExecutorMap = onert::exec::ExecutorMap;
+using Executors = onert::exec::Executors;
class InterpExecutorTest : public ::testing::Test
{
@@ -73,13 +74,11 @@ protected:
_graph->verify();
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
}
void CreateTwoStepModel()
@@ -138,13 +137,11 @@ protected:
_graph->verify();
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
}
void CreateUnspecifiedDimensionsModel()
@@ -191,13 +188,11 @@ protected:
_graph->verify();
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
}
void createExecution() { _execution = std::make_unique<Execution>(_executors); }
@@ -205,7 +200,7 @@ protected:
virtual void TearDown() { _executors = nullptr; }
std::shared_ptr<Graph> _graph{nullptr};
- std::shared_ptr<ExecutorMap> _executors{nullptr};
+ std::shared_ptr<Executors> _executors{nullptr};
std::unique_ptr<Execution> _execution{nullptr};
const int32_t _activation_value{0};
};
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 804e9fb51..fe4acd309 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#include <cker/operation/BinaryArithmeticOps.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/BinaryArithmetic.h"
-#include "misc/polymorphic_downcast.h"
-#include "cker/Types.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+#include <cker/Types.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
index a063ab14a..103604631 100644
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ b/runtime/onert/core/src/interp/operations/Concat.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Concatenation.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Concatenation.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
index 0b43a4799..72c2057c2 100644
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ b/runtime/onert/core/src/interp/operations/Conv2D.cc
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#include <cker/operation/Conv.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/Conv.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
index d1c62d73f..9f527440e 100644
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/DepthwiseConv2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
+#include "util/Utils.h"
+
+#include <cker/operation/DepthwiseConv.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index 197855ff4..e13080e76 100644
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -14,17 +14,16 @@
* limitations under the License.
*/
-#include <cmath>
-
#include "OperationUtil.h"
-
-#include "interp/Registration.h"
+#include "../Registration.h"
#include "ir/operation/ElementwiseActivation.h"
-#include <misc/polymorphic_downcast.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/Tanh.h>
+#include <misc/polymorphic_downcast.h>
+
+#include <cmath>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
index ef827605b..2bc9f517f 100644
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/FullyConnected.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/FullyConnected.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
index 0ea60875c..d686cfcf6 100644
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ b/runtime/onert/core/src/interp/operations/Gather.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Gather.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Gather.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
index b5c38819d..318088457 100644
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/InstanceNorm.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/InstanceNorm.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
index 0eec7fe9a..3db0828eb 100644
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Pad.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Pad.h"
+#include <cker/operation/Pad.h>
+
namespace onert
{
namespace interp
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 2f3b71655..3935d4756 100644
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Pool2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
index 3a118456b..1de5a5762 100644
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ b/runtime/onert/core/src/interp/operations/Reshape.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "interp/Registration.h"
+#include "../Registration.h"
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 1fc303117..8be2f2210 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/SoftMax.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/SoftMax.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
index 755103dc2..59c8e8cdf 100644
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/TransposeConv.h"
+#include <cker/operation/TransposeConv.h>
+#include <misc/polymorphic_downcast.h>
+
namespace onert
{
namespace interp
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index df30bbdbe..28cf4137d 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -17,19 +17,9 @@
#include "ir/Graph.h"
#include "OperationValidator.h"
+#include "verifier/Verifier.h"
-#include <algorithm>
-
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
#include "util/Set.h"
-#include "verifier/Verifier.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperationIndexMap.h"
-#include "dumper/text/GraphDumper.h"
-#include "backend/IConfig.h"
namespace onert
{
@@ -38,6 +28,8 @@ namespace ir
Graph::Graph() = default;
+Graph::Graph(const Graph &) = default;
+
Graph::~Graph(void) = default;
OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.test.cc
index d6de7c0cc..144500745 100644
--- a/runtime/onert/test/core/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.test.cc
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "ir/Graph.h"
#include "ir/operation/BinaryArithmetic.h"
-#include "ir/verifier/Verifier.h"
+
+#include <gtest/gtest.h>
TEST(Graph, neg_inputs_and_outputs)
{
diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc
index 591710a4d..fc956abe8 100644
--- a/runtime/onert/test/core/ir/LayoutSet.cc
+++ b/runtime/onert/core/src/ir/LayoutSet.test.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
+#include "LayoutSet.h"
-#include "ir/LayoutSet.h"
+#include <gtest/gtest.h>
using onert::ir::Layout;
using onert::ir::LayoutSet;
diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h
index 0e7ed977b..0e7ed977b 100644
--- a/runtime/onert/test/core/ir/MockNode.h
+++ b/runtime/onert/core/src/ir/MockNode.h
diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/core/src/ir/Operand.test.cc
index 47c98f939..0b858792a 100644
--- a/runtime/onert/test/core/ir/UseDef.cc
+++ b/runtime/onert/core/src/ir/Operand.test.cc
@@ -14,13 +14,14 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
+
#include "MockNode.h"
+#include "verifier/Verifier.h"
+#include <gtest/gtest.h>
+
+#include <memory>
#include <typeindex>
namespace
diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
index c363e5472..588c4e419 100644
--- a/runtime/onert/test/core/ir/OperandIndexSet.cc
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "ir/OperandIndexSequence.h"
+#include <gtest/gtest.h>
+
using onert::ir::OperandIndex;
using onert::ir::OperandIndexSequence;
diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/core/src/ir/Operands.test.cc
index 6cf9c8842..aff228b10 100644
--- a/runtime/onert/test/core/ir/OperandSet.cc
+++ b/runtime/onert/core/src/ir/Operands.test.cc
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "ir/Operands.h"
+#include <gtest/gtest.h>
+
TEST(ir_Operands, neg_set_test)
{
onert::ir::Operands set;
diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/core/src/ir/Operation.test.cc
index 68b477347..b3c4e852d 100644
--- a/runtime/onert/test/core/ir/SetIO.cc
+++ b/runtime/onert/core/src/ir/Operation.test.cc
@@ -14,16 +14,15 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "ir/Graph.h"
#include "ir/Index.h"
#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
#include "ir/operation/Concat.h"
+#include "ir/operation/Conv2D.h"
-#include <memory>
+#include <gtest/gtest.h>
+#include <memory>
#include <stdexcept>
using Index = onert::ir::IOIndex;
diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/core/src/ir/Operations.test.cc
index 4a17eeb33..e57872689 100644
--- a/runtime/onert/test/core/ir/OperationSet.cc
+++ b/runtime/onert/core/src/ir/Operations.test.cc
@@ -14,10 +14,11 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
+#include "ir/Operations.h"
#include "MockNode.h"
-#include "ir/Operations.h"
+
+#include <gtest/gtest.h>
using onert::ir::Operation;
using onert::ir::OperationIndex;
diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.test.cc
index c24aeda8d..afdb29254 100644
--- a/runtime/onert/test/core/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include <ir/Shape.h>
+#include "ir/Shape.h"
#include <gtest/gtest.h>
diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
index b4be2d9cd..1ec71cd55 100644
--- a/runtime/onert/test/core/ir/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
@@ -14,14 +14,15 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
+#include "Verifier.h"
+
+#include "../MockNode.h"
-#include "ir/Operation.h"
#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
+
+#include <gtest/gtest.h>
+
#include <memory>
-#include "ir/Operand.h"
-#include "MockNode.h"
using IndexSet = onert::ir::OperandIndexSequence;
using Mock = onert_test::ir::SimpleMock;
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
index 3fc0c8ece..d868efedf 100644
--- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <sstream>
-#include <vector>
#include <cassert>
+#include <sstream>
#include <utility>
+#include <vector>
// json type for ChromeTracingWriter
namespace
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 9da93f68a..b7fcefc7a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -15,13 +15,15 @@
*/
#include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
-#include <array>
#include <algorithm>
+#include <array>
#include <cassert>
-
#include <memory>
namespace onert
@@ -29,12 +31,27 @@ namespace onert
namespace util
{
+using namespace nnfw::misc;
+
static std::unique_ptr<IConfigSource> _source;
static std::unique_ptr<IConfigSource> _source_ext;
void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+ auto configsrc = std::make_unique<GeneralConfigSource>();
+
+ for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+ {
+ VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+ configsrc->set(it->first, it->second);
+ }
+
+ onert::util::config_source_ext(std::move(configsrc));
+}
+
static IConfigSource *config_source()
{
if (!_source)
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index 83c2649d1..c1b9c4315 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventCollector.h"
+#include "EventCollector.h"
// C++ standard libraries
#include <chrono>
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 774fe05ef..effb72373 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -17,12 +17,13 @@
#ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__
#define __ONERT_UTIL_EVENT_COLLECTOR_H__
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
+
#include "util/TracingCtx.h"
-#include <vector>
-#include <utility>
#include <string>
+#include <utility>
+#include <vector>
class EventCollector
{
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 5d3d5f5c6..85a588d38 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
{
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
index c42c53730..ca4bd302e 100644
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
#include <cassert>
diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/core/src/util/Index.test.cc
index 2d110e326..ff73e5e59 100644
--- a/runtime/onert/test/core/util/Index.cc
+++ b/runtime/onert/core/src/util/Index.test.cc
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
#include "util/Index.h"
+#include <gtest/gtest.h>
+
using Index = ::onert::util::Index<uint32_t, struct TestTag>;
TEST(Index, neg_index_test)
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
index b7fbac5e2..7a8b9f234 100644
--- a/runtime/onert/core/src/util/MDTableEventWriter.cc
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <sstream>
-#include <vector>
-#include <unordered_map>
#include <cassert>
-#include <utility>
#include <map>
#include <set>
+#include <sstream>
#include <stdint.h>
+#include <unordered_map>
+#include <utility>
+#include <vector>
// md table type
namespace
diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/core/src/util/ObjectManager.test.cc
index 78f044e56..3fe735732 100644
--- a/runtime/onert/test/core/util/ObjectManager.cc
+++ b/runtime/onert/core/src/util/ObjectManager.test.cc
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
#include "util/Index.h"
+#include "util/ObjectManager.h"
+
+#include <gtest/gtest.h>
using namespace onert;
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
index 6f03cfccf..4dea6d16c 100644
--- a/runtime/onert/core/src/util/SNPEEventWriter.cc
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -14,11 +14,12 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <unordered_map>
#include <json/json.h>
+
#include <cassert>
+#include <unordered_map>
#include <utility>
/**
diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.test.cc
index 2ecaa2885..96579bfa2 100644
--- a/runtime/onert/test/core/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.test.cc
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
#include "util/ShapeInference.h"
+#include <gtest/gtest.h>
+
using namespace onert::ir;
TEST(ShapeInference, Elementwise)
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 5649f286d..cf080abbc 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -65,10 +65,10 @@ public:
/**
* @brief Construct a new Loader object
*
- * @param graph reference on subgraphs
+ * @param model reference to model
*/
- explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr},
+ explicit BaseLoader(std::unique_ptr<ir::Model> &model)
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
_tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
@@ -114,7 +114,7 @@ protected:
// Get BuiltinOperator
BuiltinOperator getBuiltinOperator(const Operator *op)
{
- auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index());
+ auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index());
auto builtin_op = builtin_opcode->builtin_code();
if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
@@ -176,7 +176,7 @@ private:
void verifySubgraphIndex(int subg_index)
{
- const auto num_subgraphs = _model->subgraphs()->size();
+ const auto num_subgraphs = _domain_model->subgraphs()->size();
if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs))
throw std::runtime_error{std::string{"Invalid subgraph index - "} +
std::to_string(subg_index)};
@@ -189,9 +189,9 @@ protected:
int32_t _pagesize;
// loaded file description
int _fd;
- // Reference on loadable subgraphs
- std::unique_ptr<ir::Subgraphs> &_subgraphs;
- const Model *_model;
+ // Reference to ir::model (to be loaded from _domain_model)
+ std::unique_ptr<ir::Model> &_model;
+ const Model *_domain_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
@@ -290,6 +290,8 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te
case TensorType::TensorType_INT8:
return ir::DataType::QUANT_INT8_ASYMM;
// case TensorType::TensorType_FLOAT64
+ case TensorType::TensorType_UINT32:
+ return ir::DataType::UINT32;
default:
throw std::runtime_error(
std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
@@ -358,7 +360,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
const auto operand_index = subg.addOperand(shape, type_info);
// Constant tensors are indicated by non-empty data.
- const auto *data = _model->buffers()->Get(tensor->buffer())->data();
+ const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data();
if (data != nullptr)
{
using std::ptrdiff_t;
@@ -1037,7 +1039,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
"Unsupported custom operation options format");
- auto *op_code = _model->operator_codes()->Get(op->opcode_index());
+ auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index());
auto custom_op_name = op_code->custom_code()->str();
enum class BuiltinOP
@@ -1670,7 +1672,7 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
{
LoaderDomain::VerifyModelBuffer(*_verifier.get());
- _model = LoaderDomain::GetModel(_base);
+ _domain_model = LoaderDomain::GetModel(_base);
// Version unused
// const auto version = _model->version();
// Description unused
@@ -1678,14 +1680,14 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
// Metabuffer unsued
// const auto *metadata_buffer = _model->metadata_buffer();
// Load subgraphs and map operations on subgraph
- const auto domain_subgraphs = _model->subgraphs();
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index)
+ const auto subgraphs = _domain_model->subgraphs();
+ auto model = std::make_unique<ir::Model>();
+ for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
{
- auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]);
- subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+ auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
+ model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
}
- _subgraphs = std::move(subgraphs);
+ _model = std::move(model);
}
} // namespace base_loader
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h
index 44bf28056..87e5d70ae 100644
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -25,8 +25,8 @@ namespace onert
{
namespace circle_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size);
} // namespace circle_loader
} // namespace onert
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index aae831d61..5abcc9cd0 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -228,20 +228,20 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- CircleLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ CircleLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- CircleLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ CircleLoader loader(model);
loader.loadFromBuffer(buffer, size);
- return subgraphs;
+ return model;
}
} // namespace circle_loader
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index 56ca5ef00..19636a84d 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- std::shared_ptr<onert::exec::ExecutorMap> executors;
+ std::shared_ptr<onert::exec::Executors> executors;
compilation->publish(executors);
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 63036a398..bb247b97f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -18,11 +18,12 @@
#include "util/logging.h"
+using namespace onert;
+
// TODO Support multiple subgraphs
ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
- : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
- _subgraphs.get())},
- _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
+ : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()},
+ _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
{
if (model->allowedToFp16())
{
@@ -34,7 +35,7 @@ bool ANeuralNetworksCompilation::finish() noexcept
{
try
{
- _executors = _compiler->compile();
+ _artifact = _compiler->compile();
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index bd61f9d86..dff5c6dc6 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -21,8 +21,8 @@
#include "compiler/Compiler.h"
#include "ir/Graph.h"
-#include "ir/Subgraphs.h"
-#include "exec/IExecutor.h"
+#include "ir/Model.h"
+#include "exec/Executors.h"
#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
@@ -34,23 +34,16 @@ public:
bool finish() noexcept;
onert::compiler::State state(void) noexcept { return _compiler->state(); }
- void publish(std::shared_ptr<onert::exec::ExecutorMap> &executors) noexcept
+ void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
{
- executors = _executors;
+ executors = _artifact ? _artifact->_executors : nullptr;
}
private:
- std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
- // TODO Refine the ownership of TracingCtx
- // In case of nnfw API, nnfw_session has ownership of TracingCtx.
- // In case of nnapi, there is no concept of session and primary model might have the ownership
- // of TracingCtx.
- // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
- // and let's make it work with one model for now.
- std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
-
+ std::shared_ptr<onert::ir::Model> _model;
+ std::unique_ptr<onert::compiler::CompilerOptions> _coptions;
std::shared_ptr<onert::compiler::Compiler> _compiler;
- std::shared_ptr<onert::exec::ExecutorMap> _executors;
+ std::shared_ptr<onert::compiler::CompilerArtifact> _artifact;
};
#endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 70c5d2a4b..110c7cd55 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -26,7 +26,7 @@
struct ANeuralNetworksExecution
{
public:
- ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
+ ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
: _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 81ffa26f3..a641368ec 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -273,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void)
});
}
-std::shared_ptr<onert::ir::Subgraphs> ANeuralNetworksModel::getSubGraphs() const
+std::shared_ptr<onert::ir::Model> ANeuralNetworksModel::getModel() const
{
- auto all_subgs = std::make_shared<onert::ir::Subgraphs>();
+ auto model = std::make_shared<onert::ir::Model>();
- all_subgs->push(onert::ir::SubgraphIndex{0}, _graph);
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
// TODO Find all child subgraphs and copy them to all_subgs
// Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index
// to operands of control flow operations
// Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has
// subgraph itself recursively
- return all_subgs;
+ return model;
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
index 4301193d6..04f4cf0f2 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -22,7 +22,7 @@
#include <NeuralNetworksEx.h>
#include "ir/Graph.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
struct ANeuralNetworksModel
{
@@ -59,7 +59,7 @@ public:
size_t operandSize(uint32_t index) noexcept;
bool isUsageSet(uint32_t index) noexcept;
bool isOperationOutput(uint32_t index) noexcept;
- std::shared_ptr<onert::ir::Subgraphs> getSubGraphs() const;
+ std::shared_ptr<onert::ir::Model> getModel() const;
private:
void setOptionalOperand(const onert::ir::OperandIndex idx);
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h
index dda34cc6a..cf17863f5 100644
--- a/runtime/onert/frontend/tflite/include/tflite_loader.h
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -26,7 +26,7 @@ namespace onert
namespace tflite_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
} // namespace tflite_loader
} // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 3b160473d..fe69e4e2a 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -154,12 +154,12 @@ void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- TFLiteLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ TFLiteLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
} // namespace tflite_loader
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt
index 7a0df4eaa..8d9063f6c 100644
--- a/runtime/onert/frontend/trix/CMakeLists.txt
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -2,7 +2,7 @@ if (NOT BUILD_TRIX_LOADER)
return()
endif ()
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
if(TRIXEngine_FOUND)
list(APPEND SOURCES src/trix_loader.cc)
else()
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h
index 297d5ec28..26d6a3c56 100644
--- a/runtime/onert/frontend/trix/include/trix_loader.h
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -27,7 +27,7 @@ namespace trix_loader
/**
* @throw runtime_error when tvn path is wrong or tvn is invalid
*/
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc
index e2995bbd1..cdf239648 100644
--- a/runtime/onert/frontend/trix/src/trix_loader.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -67,11 +67,11 @@ void TrixMetaReader::init(const char *path)
_meta = getNPUmodel_metadata(path, false);
if (_meta == nullptr)
{
- throw std::runtime_error("Failed to get TRIV2 model metadata");
+ throw std::runtime_error("Failed to get TRIX model metadata");
}
if (NPUBIN_VERSION(_meta->magiccode) != 3)
{
- throw std::runtime_error("TRIV2 model metadata version mismatched.");
+ throw std::runtime_error("TRIX model metadata version mismatched.");
}
}
@@ -81,9 +81,9 @@ public:
/**
* @brief Construct a new Loader object
*
- * @param graph reference on subgraphs
+ * @param model reference on model
*/
- explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {}
+ explicit TrixLoader(std::unique_ptr<ir::Model> &model) : _model(model) {}
/**
* @brief Load a model from file
@@ -97,7 +97,6 @@ private:
* @throw runtime_error when tvn path is wrong or tvn is invalid
*/
void loadModel();
- void loadSubgraphs();
std::unique_ptr<ir::Graph> loadSubgraph();
void loadOperands(ir::Graph &subg);
ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
@@ -112,8 +111,11 @@ private:
protected:
/** path to model (e.g. tvn) */
std::string _model_path;
+ /** original IO shapes */
+ std::vector<ir::Shape> _origin_input_shapes;
+ std::vector<ir::Shape> _origin_output_shapes;
/** Reference on loadable subgraphs */
- std::unique_ptr<ir::Subgraphs> &_subgraphs;
+ std::unique_ptr<ir::Model> &_model;
TrixMetaReader _meta;
};
@@ -154,6 +156,8 @@ void TrixLoader::loadBulk(ir::Graph &subg)
{
ir::operation::Bulk::Param param;
param.binary_path = _model_path;
+ param.origin_input_shapes = _origin_input_shapes;
+ param.origin_output_shapes = _origin_output_shapes;
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
@@ -175,6 +179,7 @@ ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
_meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
+ _origin_input_shapes.push_back(shape);
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
return operand_index;
@@ -191,6 +196,7 @@ ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg
ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
_meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
+ _origin_output_shapes.push_back(shape);
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
return operand_index;
@@ -237,15 +243,13 @@ std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
return subg;
}
-void TrixLoader::loadSubgraphs()
+void TrixLoader::loadModel()
{
// one subgraph only
auto subg = loadSubgraph();
- _subgraphs->push(ir::SubgraphIndex(0), std::move(subg));
+ _model->push(ir::SubgraphIndex(0), std::move(subg));
}
-void TrixLoader::loadModel() { loadSubgraphs(); }
-
void TrixLoader::loadFromFile(const std::string &file_path)
{
// model path will be used to set Bulk param
@@ -255,12 +259,12 @@ void TrixLoader::loadFromFile(const std::string &file_path)
loadModel();
}
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- TrixLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ TrixLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
index 9fc8e1ff2..eecbd2217 100644
--- a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -22,10 +22,10 @@ namespace onert
{
namespace trix_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &)
+std::unique_ptr<ir::Model> loadModel(const std::string &)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- return subgraphs;
+ auto model = std::make_unique<ir::Model>();
+ return model;
}
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt
deleted file mode 100644
index 38899976d..000000000
--- a/runtime/onert/test/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(TEST_ONERT test_onert)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_ONERT} ${TESTS})
-
-target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
-
-target_link_libraries(${TEST_ONERT} onert_core)
-target_link_libraries(${TEST_ONERT} gtest)
-target_link_libraries(${TEST_ONERT} gtest_main)
-target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl)
-add_test(${TEST_ONERT} ${TEST_ONERT})
-
-install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone)
diff --git a/runtime/service/CMakeLists.txt b/runtime/service/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/service/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/service/npud/CMakeLists.txt b/runtime/service/npud/CMakeLists.txt
new file mode 100644
index 000000000..8cf51649c
--- /dev/null
+++ b/runtime/service/npud/CMakeLists.txt
@@ -0,0 +1,21 @@
+if(NOT BUILD_NPUD)
+ return()
+endif(NOT BUILD_NPUD)
+
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_executable(npud ${SOURCES})
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud PRIVATE nnfw_lib_misc)
+target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud PRIVATE ${LIB_PTHREAD})
+
+if(ENVVAR_NPUD_CONFIG)
+ target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud DESTINATION bin)
diff --git a/runtime/service/npud/core/Server.cc b/runtime/service/npud/core/Server.cc
new file mode 100644
index 000000000..5b15388dc
--- /dev/null
+++ b/runtime/service/npud/core/Server.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <thread>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool Server::_isRunning(false);
+
+Server::Server() noexcept
+ : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>())
+{
+}
+
+void Server::run(void)
+{
+ VERBOSE(Server) << "Starting Server\n";
+
+ if (_isRunning.exchange(true))
+ {
+ throw std::runtime_error("Mainloop is already running.");
+ }
+
+ g_main_loop_run(_mainloop.get());
+}
+
+void Server::stop(void)
+{
+ VERBOSE(Server) << "Stop Server\n";
+
+ if (!_isRunning.load())
+ {
+ throw std::runtime_error("Mainloop is not running");
+ }
+
+ while (!g_main_loop_is_running(_mainloop.get()))
+ {
+ std::this_thread::yield();
+ }
+
+ g_main_loop_quit(_mainloop.get());
+ _isRunning = false;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Server.h b/runtime/service/npud/core/Server.h
new file mode 100644
index 000000000..e2f37f8fe
--- /dev/null
+++ b/runtime/service/npud/core/Server.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SERVER_H__
+#define __ONE_SERVICE_NPUD_CORE_SERVER_H__
+
+#include "Signal.h"
+
+#include <glib.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class Server
+{
+public:
+ void run(void);
+ void stop(void);
+
+ static Server &instance(void)
+ {
+ static Server server;
+ return server;
+ }
+
+private:
+ Server() noexcept;
+
+ static std::atomic_bool _isRunning;
+
+ std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
+ std::unique_ptr<Signal> _signal;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SERVER_H__
diff --git a/runtime/service/npud/core/Signal.cc b/runtime/service/npud/core/Signal.cc
new file mode 100644
index 000000000..085535a6a
--- /dev/null
+++ b/runtime/service/npud/core/Signal.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Signal.h"
+
+#include "Server.h"
+#include <util/Logging.h>
+
+#include <csignal>
+
+namespace npud
+{
+namespace core
+{
+
+Signal::Signal(void) noexcept { init(); }
+
+void Signal::init(void)
+{
+ // NOTE Types of signals
+ // SIGTERM: termination request, sent to the program
+ // SIGSEGV: invalid memory access (segmentation fault)
+ // SIGINT: external interrupt, usually initiated by the user
+ // SIGILL: invalid program image, such as invalid instruction
+ // SIGABRT: abnormal termination condition, as is e.g. initiated by std::abort()
+ // SIGFPE: erroneous arithmetic operation such as divide by zero
+ // from https://en.cppreference.com/w/cpp/utility/program/SIG_types
+ std::signal(SIGTERM, handleSignal);
+ std::signal(SIGSEGV, handleSignal);
+ std::signal(SIGINT, handleSignal);
+ std::signal(SIGILL, handleSignal);
+ std::signal(SIGABRT, handleSignal);
+ std::signal(SIGFPE, handleSignal);
+}
+
+void Signal::handleSignal(int signum)
+{
+ VERBOSE(signal) << "Signal received: " << strsignal(signum) << "(" << signum << ")\n";
+ Server::instance().stop();
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Signal.h b/runtime/service/npud/core/Signal.h
new file mode 100644
index 000000000..ffddc7255
--- /dev/null
+++ b/runtime/service/npud/core/Signal.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+#define __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+
+namespace npud
+{
+namespace core
+{
+
+class Signal
+{
+public:
+ Signal() noexcept;
+
+ void init(void);
+ static void handleSignal(int signum);
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
diff --git a/runtime/service/npud/core/main.cc b/runtime/service/npud/core/main.cc
new file mode 100644
index 000000000..bd885b207
--- /dev/null
+++ b/runtime/service/npud/core/main.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <util/Logging.h>
+
+using namespace npud;
+
+int main(int argc, const char *argv[])
+{
+ auto &server = core::Server::instance();
+
+ VERBOSE(main) << "Starting npud\n";
+ try
+ {
+ server.run();
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ return 1;
+ }
+
+ VERBOSE(main) << "Finished npud\n";
+ return 0;
+}
diff --git a/runtime/service/npud/util/Config.lst b/runtime/service/npud/util/Config.lst
new file mode 100644
index 000000000..d45b37352
--- /dev/null
+++ b/runtime/service/npud/util/Config.lst
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error Define CONFIG before including this file
+#endif
+
+// Name | Type | Default
+CONFIG(NPUD_LOG_ENABLE , bool , "0")
diff --git a/runtime/service/npud/util/ConfigSource.cc b/runtime/service/npud/util/ConfigSource.cc
new file mode 100644
index 000000000..7a14b0200
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConfigSource.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <memory>
+
+namespace npud
+{
+namespace util
+{
+
+using namespace nnfw::misc;
+
+static std::unique_ptr<IConfigSource> _source;
+
+void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+
+static IConfigSource *config_source()
+{
+ if (!_source)
+ {
+#ifdef ENVVAR_FOR_DEFAULT_CONFIG
+ // Default ConfigSource is EnvConfigSource
+ _source = std::make_unique<EnvConfigSource>();
+#else
+ _source = std::make_unique<GeneralConfigSource>();
+#endif // ENVVAR_FOR_DEFAULT_CONFIG
+ }
+ return _source.get();
+}
+
+static std::string getConfigOrDefault(const std::string &key)
+{
+ static std::unordered_map<std::string, std::string> defaults;
+ if (defaults.empty())
+ {
+#define CONFIG(Name, Type, Default) \
+ { \
+ auto name = std::string{#Name}; \
+ defaults.emplace(name, std::string{Default}); \
+ }
+
+#include "Config.lst"
+
+#undef CONFIG
+ }
+
+ // Treat empty string and absence of the value to be the same
+ auto ret = config_source()->get(key);
+ // if not found search from defaults
+ if (ret.empty())
+ {
+ auto itr = defaults.find(key);
+ if (itr != defaults.end())
+ {
+ // Return the default value if exists
+ ret = itr->second;
+ }
+ }
+
+ return ret;
+}
+
+bool toBool(const std::string &val)
+{
+ static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
+ auto false_found = std::find(false_list.begin(), false_list.end(), val);
+ return false_found == false_list.end();
+}
+
+int toInt(const std::string &val) { return std::stoi(val); }
+
+bool getConfigBool(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toBool(raw);
+}
+
+int getConfigInt(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toInt(raw);
+}
+
+std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) const char *Name = #Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
diff --git a/runtime/service/npud/util/ConfigSource.h b/runtime/service/npud/util/ConfigSource.h
new file mode 100644
index 000000000..f4ecc79a5
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+#define __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace npud
+{
+namespace util
+{
+
+bool getConfigBool(const std::string &key);
+int getConfigInt(const std::string &key);
+std::string getConfigString(const std::string &key);
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) extern const char *Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/service/npud/util/Logging.h b/runtime/service/npud/util/Logging.h
new file mode 100644
index 000000000..0b75b3966
--- /dev/null
+++ b/runtime/service/npud/util/Logging.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+#define __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+
+#include <iostream>
+#include <cstring>
+
+#include "ConfigSource.h"
+
+namespace npud
+{
+namespace util
+{
+namespace logging
+{
+class Context
+{
+public:
+ Context() noexcept : _enabled{false}
+ {
+ const auto env = util::getConfigBool(util::config::NPUD_LOG_ENABLE);
+
+ if (env)
+ {
+ _enabled = true;
+ }
+ }
+
+ static Context &get() noexcept
+ {
+ static Context ctx;
+ return ctx;
+ }
+
+public:
+ bool enabled(void) const { return _enabled; }
+
+private:
+ bool _enabled;
+};
+
+static Context &ctx = Context::get();
+
+inline std::string decorated_name(const char *input)
+{
+ const int min_prefix = 16;
+ std::string prefix(input);
+ auto len_prefix = prefix.size();
+ if (len_prefix > min_prefix)
+ return "[" + prefix + "] ";
+ std::string spaces((min_prefix - len_prefix) / 2, ' ');
+ return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+} // namespace logging
+} // namespace util
+} // namespace npud
+
+#define VERBOSE(name) \
+ if (::npud::util::logging::ctx.enabled()) \
+ std::cout << ::npud::util::logging::decorated_name(#name)
+
+#define VERBOSE_F() \
+ if (::npud::util::logging::ctx.enabled()) \
+ std::cout << ::npud::util::logging::decorated_name(__func__)
+
+#define WHEN_LOG_ENABLED(METHOD) \
+ if (::npud::util::logging::ctx.enabled()) \
+ do \
+ { \
+ METHOD; \
+ } while (0)
+
+#endif // __ONE_SERVICE_NPUD_UTIL_LOGGING_H__