summaryrefslogtreecommitdiff
path: root/runtime/onert
diff options
context:
space:
mode:
authorHyeongseok Oh <hseok82.oh@samsung.com>2023-04-12 15:42:02 +0900
committerHyeongseok Oh <hseok82.oh@samsung.com>2023-04-12 15:42:02 +0900
commit323663bb115ef625642391a5a8e9b35fee8b2ae3 (patch)
tree17e2a6b91535e6f53f4cacda5e4db6aa0303dd22 /runtime/onert
parentc690d52bdd137ed6a17353aa7af35e8141ece77b (diff)
downloadnnfw-323663bb115ef625642391a5a8e9b35fee8b2ae3.tar.gz
nnfw-323663bb115ef625642391a5a8e9b35fee8b2ae3.tar.bz2
nnfw-323663bb115ef625642391a5a8e9b35fee8b2ae3.zip
Imported Upstream version 1.22.0upstream/1.22.0
Diffstat (limited to 'runtime/onert')
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc370
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h9
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc2
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h20
-rw-r--r--runtime/onert/backend/cl_common/include/cl_common/BackendContext.h2
-rw-r--r--runtime/onert/backend/cpu/BackendContext.cc2
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc18
-rw-r--r--runtime/onert/backend/gpu_cl/Backend.h16
-rw-r--r--runtime/onert/backend/gpu_cl/BackendContext.cc26
-rw-r--r--runtime/onert/backend/gpu_cl/BackendContext.h2
-rw-r--r--runtime/onert/backend/gpu_cl/CMakeLists.txt43
-rw-r--r--runtime/onert/backend/gpu_cl/ClFunction.h32
-rw-r--r--runtime/onert/backend/gpu_cl/Config.h3
-rw-r--r--runtime/onert/backend/gpu_cl/KernelGenerator.cc612
-rw-r--r--runtime/onert/backend/gpu_cl/KernelGenerator.h15
-rw-r--r--runtime/onert/backend/gpu_cl/MemoryManager.h115
-rw-r--r--runtime/onert/backend/gpu_cl/TensorBuilder.cc24
-rw-r--r--runtime/onert/backend/gpu_cl/TensorBuilder.h7
-rw-r--r--runtime/onert/backend/gpu_cl/TensorManager.cc38
-rw-r--r--runtime/onert/backend/gpu_cl/TensorManager.h22
-rw-r--r--runtime/onert/backend/gpu_cl/TensorRegistry.h2
-rw-r--r--runtime/onert/backend/gpu_cl/Utils.h155
-rw-r--r--runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h108
-rw-r--r--runtime/onert/backend/gpu_cl/operand/CLTensor.cc8
-rw-r--r--runtime/onert/backend/gpu_cl/operand/CLTensor.h4
-rw-r--r--runtime/onert/backend/gpu_cl/operand/ICLTensor.cc40
-rw-r--r--runtime/onert/backend/gpu_cl/operand/ICLTensor.h49
-rw-r--r--runtime/onert/backend/ruy/BackendContext.cc2
-rw-r--r--runtime/onert/backend/trix/BackendContext.cc2
-rw-r--r--runtime/onert/backend/trix/BatchThreadPool.cc69
-rw-r--r--runtime/onert/backend/trix/BatchThreadPool.h130
-rw-r--r--runtime/onert/backend/trix/Convert.cc54
-rw-r--r--runtime/onert/backend/trix/Convert.h93
-rw-r--r--runtime/onert/backend/trix/DevContext.cc307
-rw-r--r--runtime/onert/backend/trix/DevContext.h197
-rw-r--r--runtime/onert/backend/trix/KernelGenerator.cc4
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.cc137
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.h3
-rw-r--r--runtime/onert/backend/xnnpack/BackendContext.cc2
-rw-r--r--runtime/onert/core/CMakeLists.txt2
-rw-r--r--runtime/onert/core/include/backend/basic/BackendContextHelpers.h18
-rw-r--r--runtime/onert/core/include/compiler/Compiler.h107
-rw-r--r--runtime/onert/core/include/compiler/CompilerFactory.h47
-rw-r--r--runtime/onert/core/include/compiler/CompilerOptions.h91
-rw-r--r--runtime/onert/core/include/compiler/ICompiler.h63
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h5
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInferer.h9
-rw-r--r--runtime/onert/core/include/exec/Execution.h119
-rw-r--r--runtime/onert/core/include/exec/Executors.h71
-rw-r--r--runtime/onert/core/include/exec/FunctionSequence.h2
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h17
-rw-r--r--runtime/onert/core/include/exec/IExecutors.h98
-rw-r--r--runtime/onert/core/include/ir/Graph.h36
-rw-r--r--runtime/onert/core/include/ir/Index.h4
-rw-r--r--runtime/onert/core/include/ir/NNPkg.h102
-rw-r--r--runtime/onert/core/include/ir/OperandIndexSequence.h7
-rw-r--r--runtime/onert/core/include/ir/Shape.h6
-rw-r--r--runtime/onert/core/include/util/Config.lst1
-rw-r--r--runtime/onert/core/include/util/Index.h7
-rw-r--r--runtime/onert/core/include/util/ObjectManager.h4
-rw-r--r--runtime/onert/core/include/util/Utils.h37
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryManager.cc2
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlanner.cc2
-rw-r--r--runtime/onert/core/src/backend/basic/StaticTensorManager.cc2
-rw-r--r--runtime/onert/core/src/backend/builtin/BackendContext.cc2
-rw-r--r--runtime/onert/core/src/backend/builtin/IOTensor.h2
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.cc8
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.h9
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc8
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.h8
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc8
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc10
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h8
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc772
-rw-r--r--runtime/onert/core/src/compiler/CompilerFactory.cc (renamed from runtime/onert/backend/gpu_cl/TensorBuilderHelper.h)37
-rw-r--r--runtime/onert/core/src/compiler/CompilerOptions.cc145
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc51
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h25
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc24
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc2
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.test.cc4
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc8
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc4
-rw-r--r--runtime/onert/core/src/compiler/MultiModelCompiler.cc214
-rw-r--r--runtime/onert/core/src/compiler/MultiModelCompiler.h75
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc98
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h2
-rw-r--r--runtime/onert/core/src/compiler/pass/OddOutputPass.cc4
-rw-r--r--runtime/onert/core/src/compiler/pass/PassRunner.cc2
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc4
-rw-r--r--runtime/onert/core/src/exec/Execution.cc247
-rw-r--r--runtime/onert/core/src/exec/Execution.test.cc337
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.cc8
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h2
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc57
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h13
-rw-r--r--runtime/onert/core/src/exec/Executors.cc672
-rw-r--r--runtime/onert/core/src/exec/Executors.h169
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.cc320
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.h99
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.test.cc902
-rw-r--r--runtime/onert/core/src/exec/ParallelScheduler.cc2
-rw-r--r--runtime/onert/core/src/exec/SingleModelExecutors.cc61
-rw-r--r--runtime/onert/core/src/exec/SingleModelExecutors.h70
-rw-r--r--runtime/onert/core/src/exec/ThreadPool.cc2
-rw-r--r--runtime/onert/core/src/interp/Buffer.h91
-rw-r--r--runtime/onert/core/src/interp/ExecEnv.h212
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.cc127
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.h89
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.test.cc355
-rw-r--r--runtime/onert/core/src/interp/InterpOps.lst73
-rw-r--r--runtime/onert/core/src/interp/Interpreter.cc184
-rw-r--r--runtime/onert/core/src/interp/Interpreter.h64
-rw-r--r--runtime/onert/core/src/interp/Registration.h43
-rw-r--r--runtime/onert/core/src/interp/Tensor.cc57
-rw-r--r--runtime/onert/core/src/interp/Tensor.h189
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc204
-rw-r--r--runtime/onert/core/src/interp/operations/Concat.cc147
-rw-r--r--runtime/onert/core/src/interp/operations/Conv2D.cc151
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc156
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc160
-rw-r--r--runtime/onert/core/src/interp/operations/FullyConnected.cc134
-rw-r--r--runtime/onert/core/src/interp/operations/Gather.cc138
-rw-r--r--runtime/onert/core/src/interp/operations/InstanceNorm.cc121
-rw-r--r--runtime/onert/core/src/interp/operations/OperationUtil.h203
-rw-r--r--runtime/onert/core/src/interp/operations/Pad.cc106
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc140
-rw-r--r--runtime/onert/core/src/interp/operations/Reshape.cc63
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc123
-rw-r--r--runtime/onert/core/src/interp/operations/TransposeConv.cc141
-rw-r--r--runtime/onert/core/src/ir/Shape.cc8
-rw-r--r--runtime/onert/core/src/ir/Shape.test.cc2
-rw-r--r--runtime/onert/core/src/util/ChromeTracingEventWriter.cc6
-rw-r--r--runtime/onert/core/src/util/MDTableEventWriter.cc8
-rw-r--r--runtime/onert/core/src/util/SNPEEventWriter.cc22
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc6
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h30
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc4
-rw-r--r--runtime/onert/frontend/nnapi/CMakeLists.txt2
-rw-r--r--runtime/onert/frontend/nnapi/compilation.cc4
-rw-r--r--runtime/onert/frontend/nnapi/execution.cc2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc5
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h6
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h2
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc5
146 files changed, 5422 insertions, 6261 deletions
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 2fbb96f31..be30ee296 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001500
+#define NNFW_VERSION 0x01001600
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 9b43dd381..8eedb5314 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -16,7 +16,7 @@
#include "nnfw_api_internal.h"
#include "CustomKernelRegistry.h"
-#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
#include "util/ConfigSource.h"
#include "util/Exceptions.h"
#include "util/logging.h"
@@ -208,29 +208,24 @@ NNFW_STATUS nnfw_session::create(nnfw_session **session)
{
if (session == nullptr)
return NNFW_STATUS_UNEXPECTED_NULL;
-
- // Create session
- *session = new (std::nothrow) nnfw_session();
- if (*session == nullptr)
+ try
{
- std::cerr << "Error during session creation" << std::endl;
- return NNFW_STATUS_OUT_OF_MEMORY;
+ auto new_session = std::unique_ptr<nnfw_session>(new nnfw_session());
+ new_session->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ *session = new_session.release();
}
-
- // Initialize fields
- try
+ catch (const std::bad_alloc &e)
{
- (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ std::cerr << "Error during session creation" << std::endl;
+ *session = nullptr; // Set nullptr on error to keep the old behavior
+ return NNFW_STATUS_OUT_OF_MEMORY;
}
catch (const std::exception &e)
{
std::cerr << "Error during session initialization : " << e.what() << std::endl;
- delete *session;
- *session = nullptr;
-
+ *session = nullptr; // Set nullptr on error to keep the old behavior
return NNFW_STATUS_ERROR;
}
-
return NNFW_STATUS_NO_ERROR;
}
@@ -331,7 +326,6 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
std::string manifest_file_name = package_path + "/metadata/MANIFEST";
std::ifstream mfs(manifest_file_name);
- _package_file_path = package_path;
// extract the filename of the first(index 0) model
// e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] }
Json::Value root;
@@ -351,7 +345,14 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
}
}
_nnpkg = std::make_shared<onert::ir::NNPkg>();
- for (uint32_t i = 0; i < models.size(); ++i)
+ auto num_models = models.size();
+ if (num_models == 0 || (num_models - 1) > onert::ir::ModelIndex::max())
+ {
+ std::cerr << "Invalid model size - " << std::to_string(num_models) << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ for (uint16_t i = 0; i < num_models; ++i)
{
auto model_file_path = package_path + std::string("/") + models[i].asString();
auto model_type = model_types[i].asString();
@@ -390,6 +391,8 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
for (uint32_t j = 0; j < tos.size(); ++j)
_nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
}
+
+ _nnpkg->verify();
_state = State::MODEL_LOADED;
}
catch (const std::exception &e)
@@ -420,14 +423,7 @@ NNFW_STATUS nnfw_session::prepare()
try
{
- // TODO: Compile all models in case of multiple models
- if (_nnpkg->model_count() > 2)
- {
- std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+ auto compiler = onert::compiler::CompilerFactory::get().create(_nnpkg, _coptions);
_nnpkg.reset();
_compiler_artifact = compiler->compile();
_execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
@@ -442,50 +438,10 @@ NNFW_STATUS nnfw_session::prepare()
return NNFW_STATUS_NO_ERROR;
}
-NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
+NNFW_STATUS nnfw_session::prepare_pipeline(const char *)
{
- // NOTE. If users want to run prepare_pipeline() more than one time, this could be removed.
- if (!isStateModelLoaded())
- {
- std::cerr << "Error during model prepare pipeline : ";
- if (isStateInitialized())
- {
- std::cerr << "prepare_pipeline should be run once";
- }
- else
- {
- std::cerr << "invalid state";
- }
- std::cerr << std::endl;
- return NNFW_STATUS_INVALID_STATE;
- }
-
- try
- {
- auto model = _nnpkg->primary_model();
- auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
- _nnpkg.reset();
- auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
-
- for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
- {
- _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
- }
- make_dependency();
- _threads.resize(_executions.size());
- for (uint32_t i = 0; i < _threads.size(); i++)
- {
- _threads[i] = std::thread(&onert::exec::Execution::runInference, _executions[i].get());
- }
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during model prepare : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
- _state = State::PREPARED;
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline prepare_pipeline: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
NNFW_STATUS nnfw_session::run()
@@ -497,12 +453,6 @@ NNFW_STATUS nnfw_session::run()
return NNFW_STATUS_INVALID_STATE;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::run : not supported for pipeline run" << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->execute();
@@ -532,13 +482,6 @@ NNFW_STATUS nnfw_session::run_async()
return NNFW_STATUS_INVALID_STATE;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::run_async : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
_execution->startExecute();
_state = State::RUNNING;
@@ -554,12 +497,6 @@ NNFW_STATUS nnfw_session::await()
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::await : not supported for pipeline run" << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
_execution->waitFinish();
_state = State::FINISHED_RUN;
@@ -583,13 +520,6 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::set_input : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->setInput(onert::ir::IOIndex(index), buffer, length);
@@ -619,13 +549,6 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::set_output : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->setOutput(onert::ir::IOIndex(index), buffer, length);
@@ -650,7 +573,7 @@ NNFW_STATUS nnfw_session::input_size(uint32_t *number)
std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- *number = primary_subgraph()->getInputs().size();
+ *number = getInputSize();
}
catch (const std::exception &e)
{
@@ -672,7 +595,7 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- *number = primary_subgraph()->getOutputs().size();
+ *number = getOutputSize();
}
catch (const std::exception &e)
{
@@ -684,6 +607,13 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
{
+ if (!isStatePreparedOrFinishedRun())
+ {
+ std::cerr << "Error during nnfw_session::set_input_layout : "
+ << "run should be run after prepare" << std::endl;
+ return NNFW_STATUS_INVALID_STATE;
+ }
+
try
{
if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -692,14 +622,8 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
return NNFW_STATUS_ERROR;
}
- if (_execution)
- {
- _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
- else
- {
- _executions.at(0)->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
+
+ _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
}
catch (const std::exception &e)
{
@@ -711,6 +635,13 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
{
+ if (!isStatePreparedOrFinishedRun())
+ {
+ std::cerr << "Error during nnfw_session::set_output_layout : "
+ << "run should be run after prepare" << std::endl;
+ return NNFW_STATUS_INVALID_STATE;
+ }
+
try
{
if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -720,15 +651,8 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
<< std::endl;
return NNFW_STATUS_ERROR;
}
- if (_execution)
- {
- _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
- else
- {
- _executions.at(_executions.size() - 1)
- ->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
+
+ _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
}
catch (const std::exception &e)
{
@@ -771,27 +695,13 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
if (!isStatePreparedOrFinishedRun())
{
- // In this case, if we apply input shape in primary_subgraph, it will propagate after
- // compilation and excution
- auto model = _nnpkg->primary_model();
- auto primary_subgraph = model->primary_subgraph();
- auto ind = primary_subgraph->getInputs().at(index);
- auto &input = primary_subgraph->operands().at(ind);
- // overwrite input shape with the shape from ti
- input.info().shape(new_shape);
+ // In this case, if we apply input shape, it will propagate after compilation and excution
+ auto &info = _nnpkg->inputInfo(index);
+ info.shape(new_shape);
}
else // when called after nnfw_session::prepare()
- {
- if (_execution)
- {
- _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
- }
- else
- {
- _executions.at(0)->changeInputShape(onert::ir::IOIndex(index), new_shape);
- }
- }
+ _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
return NNFW_STATUS_NO_ERROR;
}
@@ -815,21 +725,26 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
<< std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- if (index >= primary_subgraph()->getInputs().size())
+
+ if (index >= getInputSize())
{
std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
<< std::endl;
return NNFW_STATUS_ERROR;
}
- auto opidx = primary_subgraph()->getInputs().at(index);
- auto shape = primary_subgraph()->operands().at(opidx).shape();
- if (isStatePreparedOrFinishedRun())
+
+ if (isStateModelLoaded())
+ {
+ auto info = _nnpkg->inputInfo(index);
+ fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+ }
+ else
{
- shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index})
- : _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
+ auto io_index = onert::ir::IOIndex{index};
+ auto shape = _execution->getInputShape(io_index);
+ auto dtype = _compiler_artifact->_executors->inputInfo(io_index).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
- auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
- fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
@@ -851,26 +766,27 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
return NNFW_STATUS_UNEXPECTED_NULL;
}
- if (index >= primary_subgraph()->getOutputs().size())
- {
- std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
- auto opidx = primary_subgraph()->getOutputs().at(index);
- auto shape = primary_subgraph()->operands().at(opidx).shape();
- // If it is called after `nnfw_run` then get the shape from Execution, not from the graph
- if (isStateFinishedRun())
+ if (index >= getOutputSize())
+ {
+ std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ if (isStateModelLoaded())
{
- shape = _execution
- ? _execution->getOutputShape(onert::ir::IOIndex{index})
- : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
+ auto info = _nnpkg->outputInfo(index);
+ fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+ }
+ else
+ {
+ auto io_index = onert::ir::IOIndex{index};
+ auto shape = _execution->getOutputShape(io_index);
+ auto dtype = _compiler_artifact->_executors->outputInfo(io_index).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
- auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
- fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
@@ -881,86 +797,16 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
return NNFW_STATUS_NO_ERROR;
}
-void nnfw_session::make_dependency()
+NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *, std::vector<uint32_t> *)
{
- for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
- {
- auto &out_graph = _executions[out_exe]->primary_subgraph();
- for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
- {
- if (out_exe == in_exe)
- continue;
- auto &in_graph = _executions[in_exe]->primary_subgraph();
- for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
- out++)
- {
- auto out_opidx = out_graph.getOutputs().at(out->second);
- auto out_shape = out_graph.operands().at(out_opidx).shape();
- for (auto in = in_graph._name_to_input_begin(); in != in_graph._name_to_input_end(); in++)
- {
- if (out->first != in->first)
- continue;
-
- auto in_opidx = in_graph.getInputs().at(in->second);
- auto in_shape = in_graph.operands().at(in_opidx).shape();
- if (out_shape.rank() != in_shape.rank())
- continue;
-
- bool is_same = true;
- for (int32_t i = 0; i < out_shape.rank(); i++)
- {
- if (out_shape.dim(i) != in_shape.dim(i))
- {
- is_same = false;
- break;
- }
- }
-
- if (is_same)
- _executions[out_exe]->pushNextExe(_executions[in_exe], out->second, in->second);
- }
- }
- }
- }
-}
-
-NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *inputs,
- std::vector<uint32_t> *lengths)
-{
- static uint32_t count = 0;
- if (inputs->empty())
- {
- _executions[0]->setFinish();
- for (uint32_t i = 0; i < _threads.size(); i++)
- {
- _threads[i].join();
- }
- return NNFW_STATUS_NO_ERROR;
- }
- _executions[0]->asyncIoDescSemWait();
- _executions[0]->createNewAsyncDesc(count++);
- for (uint32_t i = 0; i < inputs->size(); i++)
- {
- _executions[0]->executeAsyncInput(onert::ir::IOIndex(i), inputs->at(i), lengths->at(i));
- }
- _executions[0]->asyncIoDescSemPost();
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline push_pipeline_input: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
-NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *outputs)
+NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *)
{
- auto results = _executions[_executions.size() - 1]->getAsyncResults();
- while (results->empty())
- {
- if (_executions[_executions.size() - 1]->stopWait())
- return NNFW_STATUS_ERROR;
- }
-
- auto result = results->front();
- results->pop_front();
- for (uint32_t i = 0; i < result.size(); i++)
- outputs->push_back(result[i]);
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline pop_pipeline_output: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
@@ -1088,10 +934,6 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
{
options.he_profiling_mode = toBool(value);
}
- else if (skey == config::DISABLE_COMPILE)
- {
- options.disable_compile = toBool(value);
- }
else
{
return NNFW_STATUS_ERROR;
@@ -1103,23 +945,41 @@ const onert::ir::Graph *nnfw_session::primary_subgraph()
{
if (_nnpkg != nullptr)
{
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return _nnpkg->primary_model()->primary_subgraph().get();
}
else
{
- assert(_execution != nullptr || !_executions.empty());
- // TODO Remove const_cast
+ assert(_execution != nullptr);
// We assumed the graph will not change after compilation, but shape could change
- if (!_executions.empty())
- {
- return &_executions[0]->primary_parentgraph();
- }
-
return &_execution->primary_subgraph();
}
}
+uint32_t nnfw_session::getInputSize()
+{
+ if (isStateInitialized())
+ throw std::runtime_error{"Model is not loaded yet"};
+
+ if (isStateModelLoaded())
+ return _nnpkg->inputSize();
+
+ // Session is prepared (general inference)
+ return _compiler_artifact->_executors->inputSize();
+}
+
+uint32_t nnfw_session::getOutputSize()
+{
+ if (isStateInitialized())
+ throw std::runtime_error{"Model is not loaded yet"};
+
+ if (isStateModelLoaded())
+ return _nnpkg->outputSize();
+
+ // Session is prepared (general inference)
+ return _compiler_artifact->_executors->outputSize();
+}
+
NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
{
if (!isStateModelLoaded())
@@ -1174,7 +1034,7 @@ bool nnfw_session::isStateInitialized()
{
assert(_nnpkg == nullptr);
assert(_coptions.empty());
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return true;
}
else
@@ -1189,7 +1049,7 @@ bool nnfw_session::isStateModelLoaded()
{
assert(_nnpkg != nullptr);
assert(!_coptions.empty());
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return true;
}
else
@@ -1204,7 +1064,7 @@ bool nnfw_session::isStatePrepared()
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
else
@@ -1219,7 +1079,7 @@ bool nnfw_session::isStateRunning()
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
return false;
@@ -1231,7 +1091,7 @@ bool nnfw_session::isStateFinishedRun()
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
else
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 9b729fd5f..8e2c2fba6 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -136,9 +136,6 @@ public:
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
- // accessor
- std::vector<std::shared_ptr<onert::exec::Execution>> *get_executions() { return &_executions; }
-
//
// Internal-only API
//
@@ -151,7 +148,6 @@ public:
//
// Experimental API
//
- void make_dependency();
NNFW_STATUS push_pipeline_input(std::vector<void *> *inputs, std::vector<uint32_t> *lengths);
NNFW_STATUS pop_pipeline_output(std::vector<void *> *outputs);
@@ -166,6 +162,9 @@ public:
private:
const onert::ir::Graph *primary_subgraph();
+ uint32_t getInputSize();
+ uint32_t getOutputSize();
+
bool isStateInitialized();
bool isStateModelLoaded();
bool isStatePrepared();
@@ -181,8 +180,6 @@ private:
std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
std::vector<std::thread> _threads;
- std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
- std::string _package_file_path;
};
#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index e709286df..5b0ec92b7 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -256,7 +256,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
std::vector<const ::arm_compute::ICLTensor *> input_tensors;
- for (auto &ifm_ind : input_indexes)
+ for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index e008fd6f5..b0b5ca612 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -162,7 +162,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
auto &offset = parent_info.coordinates;
auto frontend_layout = parent_info.frontend_layout;
- assert(obj.shape().rank() <= ir::Shape::MAX_RANK);
+ assert(obj.shape().rank() <= ir::Shape::kMaxRank);
auto shape = obj.shape();
if (_operands.at(parent_index).shape().rank() >= 4 && frontend_layout == ir::Layout::NHWC &&
backend_layout == ir::Layout::NCHW)
@@ -218,11 +218,11 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
{
auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
- for (auto &entry : lifetime_map)
+ for (const auto &entry : lifetime_map)
{
- auto &use = entry.second;
- auto use_type = use.first;
- auto use_index = use.second;
+ const auto &use = entry.second;
+ const auto &use_type = use.first;
+ const auto &use_index = use.second;
assert(use_index.valid());
if (use_type == UsesType::FIRST)
_tensor_mgr->startLifetime(use_index);
@@ -255,9 +255,9 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
assert(_tensor_mgr->nonconstTensors().size() == 0);
// Normal tensors
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) > 0)
continue;
@@ -273,9 +273,9 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
assert(_tensor_mgr->nonconstSubtensors().size() == 0);
// TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
// `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) == 0)
continue;
@@ -343,7 +343,7 @@ template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
{
- for (auto &cand : seq)
+ for (const auto &cand : seq)
{
if (!isSubTensorOf(parent, cand))
{
diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
index 7bb72d74e..5536d2780 100644
--- a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
+++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
@@ -65,7 +65,7 @@ public:
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) {
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
index e6f7b8470..da48a785d 100644
--- a/runtime/onert/backend/cpu/BackendContext.cc
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 762ee7392..896883bc3 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -279,7 +279,7 @@ void KernelGenerator::visit(const ir::operation::AddN &node)
const auto output_index{node.getOutputs().at(0)};
std::vector<const IPortableTensor *> input_tensors;
- for (auto &input_idx : node.getInputs())
+ for (const auto &input_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
@@ -386,7 +386,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::ConcatLayer>();
@@ -626,7 +626,7 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto equation = node.param().equation;
@@ -643,7 +643,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
std::vector<custom::TypeInfo> &types,
std::vector<IPortableTensor *> &tensors) {
- for (auto &idx : opSeq)
+ for (const auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_layout` is correct for custom operations
@@ -750,7 +750,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::PackLayer>();
@@ -772,7 +772,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
std::vector<IPortableTensor *> output_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::UnpackLayer>();
@@ -934,7 +934,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
std::vector<IPortableTensor *> out_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitLayer>();
@@ -1261,7 +1261,7 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto epsilon = node.param().epsilon;
@@ -1372,7 +1372,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
std::vector<IPortableTensor *> out_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitVLayer>();
diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h
index d67ba1602..cdf965557 100644
--- a/runtime/onert/backend/gpu_cl/Backend.h
+++ b/runtime/onert/backend/gpu_cl/Backend.h
@@ -28,6 +28,7 @@
#include "TensorBuilder.h"
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace onert
@@ -55,15 +56,16 @@ public:
{
return nullptr;
}
- auto tm = createTensorManager(&environment->context());
- auto tr = std::make_shared<TensorRegistry>(tm);
-
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
- create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
+ tflite::gpu::CreateGpuModelInfo create_info;
+ create_info.precision = tflite::gpu::CalculationsPrecision::F32;
create_info.storage_type =
tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
- create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
+ create_info.hints.Add(tflite::gpu::ModelHints::kFastestInference);
+
+ auto tm = createTensorManager(&environment->context(), create_info, environment);
+
+ auto tr = std::make_shared<TensorRegistry>(tm);
auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
cc->device = environment->GetDevicePtr();
@@ -71,7 +73,7 @@ public:
cc->queue = environment->queue();
cc->cache = environment->program_cache();
- auto tb = std::make_shared<TensorBuilder>(operands, tm, create_info, environment);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc
index ec9442155..b09319d98 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.cc
+++ b/runtime/onert/backend/gpu_cl/BackendContext.cc
@@ -86,6 +86,32 @@ ITensorRegistry *BackendContext::genTensors()
return tensor_registry.get();
}
+FunctionMap BackendContext::genKernels()
+{
+ FunctionMap fn_map;
+
+ for (auto op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ fn_map.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ kernel_gen->get_operation(fn_map);
+ tensor_builder->allocate();
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &&it : fn_map)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return fn_map;
+}
+
} // namespace gpu_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h
index 7412d2bce..da5daae02 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.h
+++ b/runtime/onert/backend/gpu_cl/BackendContext.h
@@ -25,6 +25,7 @@
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
+
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
namespace onert
@@ -52,6 +53,7 @@ public:
}
ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
protected:
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt
index eb1964214..d62dbd84c 100644
--- a/runtime/onert/backend/gpu_cl/CMakeLists.txt
+++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt
@@ -24,7 +24,26 @@ if(NOT Fp16_FOUND)
return()
endif(NOT Fp16_FOUND)
-nnas_find_package(TensorFlowGpu QUIET)
+nnas_find_package(VulkanSource QUIET)
+if(NOT VulkanSource_FOUND)
+ return()
+endif(NOT VulkanSource_FOUND)
+
+nnas_find_package(Opengl_HeadersSource QUIET)
+if(NOT Opengl_HeadersSource_FOUND)
+ return()
+endif(NOT Opengl_HeadersSource_FOUND)
+
+nnas_find_package(Egl_HeadersSource QUIET)
+if(NOT Egl_HeadersSource_FOUND)
+ return()
+endif(NOT Egl_HeadersSource_FOUND)
+
+if (NOT ${TARGET_OS} MATCHES "tizen")
+ nnas_find_package(FlatBuffers REQUIRED)
+endif ()
+
+nnfw_find_package(TensorFlowGpu QUIET)
if(NOT TensorFlowGpu_FOUND)
message(FATAL_ERROR 'TensorFlowGpu lib not found')
return()
@@ -35,18 +54,32 @@ file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TensorFlowSource_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${VulkanSource_DIR}/include)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Opengl_HeadersSource_DIR}/api)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Egl_HeadersSource_DIR}/api)
+
+if (${TARGET_OS} MATCHES "tizen")
+ target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-Wno-error=deprecated-copy")
+endif ()
+
+target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-DCL_TARGET_OPENCL_VERSION=220" "-DEGL_NO_X11")
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE OpenCL_Headers)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
+if (${TARGET_OS} MATCHES "tizen")
+ target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers)
+else()
+ target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers::flatbuffers)
+endif ()
set_target_properties(${LIB_ONERT_BACKEND_GPU_CL} PROPERTIES OUTPUT_NAME backend_gpu_cl)
@@ -55,4 +88,8 @@ if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_GPU_CL}>)
endif()
+add_library(tflite_ignore_warnings INTERFACE)
+target_compile_options(tflite_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE tflite_ignore_warnings)
+
install(TARGETS ${LIB_ONERT_BACKEND_GPU_CL} DESTINATION lib)
diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h
index 5e8a11a84..6afbd4910 100644
--- a/runtime/onert/backend/gpu_cl/ClFunction.h
+++ b/runtime/onert/backend/gpu_cl/ClFunction.h
@@ -22,9 +22,9 @@
#include <vector>
#include <memory>
-#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
-#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_operation.h"
namespace onert
{
@@ -35,53 +35,51 @@ namespace gpu_cl
class ClFunction : public ::onert::exec::IFunction
{
public:
- ClFunction() : _gpu_operations(), _creation_context() {}
-
-public:
- void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+ ClFunction(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+ : _creation_context(creation_context), _gpu_operations()
{
- _creation_context = creation_context;
}
- void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
+public:
+ void add_operation(tflite::gpu::cl::ClOperation *gpu_operation)
{
- _gpu_operations.push_back(std::move(gpu_operation));
+ _gpu_operations.push_back(gpu_operation);
}
void run() override
{
- for (const auto &gpu_operation : _gpu_operations)
+ for (const auto gpu_operation : _gpu_operations)
{
if (!gpu_operation->AddToQueue(_creation_context->queue).ok())
{
throw std::runtime_error("Failed to AddToQueue.");
}
- if (!_creation_context->queue->WaitForCompletion().ok())
- {
- throw std::runtime_error("Failed to WaitForCompletion.");
- }
}
}
void prepare() override
{
- for (const auto &gpu_operation : _gpu_operations)
+ for (const auto gpu_operation : _gpu_operations)
{
+ if (!gpu_operation->GetGpuOperation().AssembleCode(_creation_context->GetGpuInfo()).ok())
+ {
+ throw std::runtime_error("Failed to AssembleCode.");
+ }
if (!gpu_operation->Compile(*_creation_context).ok())
{
throw std::runtime_error("Failed to Compile.");
}
-
if (!gpu_operation->UpdateParams().ok())
{
throw std::runtime_error("Failed to UpdateParams.");
}
+ gpu_operation->GetGpuOperation().args_.ReleaseCPURepresentation();
}
}
private:
- std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+ std::vector<tflite::gpu::cl::ClOperation *> _gpu_operations;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h
index 6a455bbb5..f8f94aaf4 100644
--- a/runtime/onert/backend/gpu_cl/Config.h
+++ b/runtime/onert/backend/gpu_cl/Config.h
@@ -41,9 +41,6 @@ public:
bool supportDynamicTensor() override { return false; }
bool supportFP16() override { return true; }
std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-
-private:
- void *_handle = nullptr;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
index 04edc3928..a24c4f59c 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
@@ -23,10 +23,11 @@
#include "TensorManager.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
#include "ir/Operations.h"
#include "ir/Operations.Include.h"
@@ -38,9 +39,6 @@
#include "util/logging.h"
#include "util/Utils.h"
-using namespace tflite::gpu;
-using namespace tflite::gpu::cl;
-
namespace onert
{
namespace backend
@@ -48,39 +46,170 @@ namespace backend
namespace gpu_cl
{
-HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); }
+void KernelGenerator::addClNode(const std::vector<ir::OperandIndex> &inputs,
+ const std::vector<ir::OperandIndex> &outputs,
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op)
+{
+ tflite::gpu::cl::CLNode cl_node;
+ cl_node.cl_operation.Init(std::move(gpu_op));
+ cl_node.inputs.resize(inputs.size());
+ for (size_t i = 0; i < inputs.size(); ++i)
+ {
+ cl_node.inputs[i] = inputs[i].value();
+ }
+ cl_node.outputs.resize(outputs.size());
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ cl_node.outputs[i] = outputs[i].value();
+ }
+ _nodes.push_back(std::move(cl_node));
+ _operation_indexes.push_back(_operation_index);
+ return;
+}
+
+void KernelGenerator::get_operation(FunctionMap &Functions)
+{
+ size_t size = _nodes.size();
+ size_t i = 0;
+ for (auto &&it : Functions)
+ {
+ auto index = it.first;
+ auto node_index = _operation_indexes[i];
+ while (index == node_index)
+ {
+ auto &fn_seq = it.second;
+ auto &node = _nodes[i++];
+ for (size_t j = 0; j < node.inputs.size(); ++j)
+ {
+ uint32_t idx = node.inputs[j];
+ node.cl_operation.GetGpuOperation().SetSrc(
+ _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+ }
+ for (size_t j = 0; j < node.outputs.size(); ++j)
+ {
+ uint32_t idx = node.outputs[j];
+ node.cl_operation.GetGpuOperation().SetDst(
+ _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+ }
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ static_cast<ClFunction &>(ifunc).add_operation(&node.cl_operation);
+ });
+ if (i == size)
+ {
+ break;
+ }
+ node_index = _operation_indexes[i];
+ }
+ if (i == size)
+ {
+ break;
+ }
+ }
+}
-template <typename AttrT>
-void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *attr)
+absl::Status KernelGenerator::readConstTensor(const ir::OperandIndex &index,
+ tflite::gpu::TensorOrScalar *param)
{
- if (type == ir::PaddingType::SAME)
+ const auto shape = _ctx.at(index).shape();
+ if (shape.rank() == 0 && shape.num_elements() == 1)
{
- attr->padding = CalculateSamePadding(input_shape, *attr);
+ tflite::gpu::Tensor<tflite::gpu::Scalar, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = 1;
+ tensor.data.resize(1);
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = tensor.data[0];
}
else
{
- attr->padding.prepended = HW(0, 0);
- attr->padding.appended = HW(0, 0);
+ if (CheckIfLinearConvertible(&shape))
+ {
+ tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = shape.dim(shape.rank() - 1);
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = std::move(tensor);
+ }
+ else
+ {
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+ if (shape.rank() == 3)
+ {
+ tensor.shape.h = shape.dim(0);
+ tensor.shape.w = shape.dim(1);
+ tensor.shape.c = shape.dim(2);
+ }
+ else if (shape.rank() == 4)
+ {
+ if (shape.dim(0) != 1)
+ {
+ return absl::UnimplementedError("Batch size is not equal to 1.");
+ }
+ tensor.shape.h = shape.dim(1);
+ tensor.shape.w = shape.dim(2);
+ tensor.shape.c = shape.dim(3);
+ }
+ else
+ {
+ return absl::InvalidArgumentError(
+ "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+ }
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = std::move(tensor);
+ }
}
+ return absl::OkStatus();
}
-PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+absl::Status KernelGenerator::readConstTensor(
+ const ir::OperandIndex &index,
+ absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha)
{
- switch (type_ir)
+ const auto shape = _ctx.at(index).shape();
+ if (CheckIfLinearConvertible(&shape))
{
- case ir::operation::Pool2D::PoolType::AVG:
- return PoolingType::AVERAGE;
- case ir::operation::Pool2D::PoolType::MAX:
- return PoolingType::MAX;
- default:
- throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+ tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = shape.dim(shape.rank() - 1);
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *alpha = std::move(tensor);
}
+ else
+ {
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+ if (shape.rank() == 3)
+ {
+ tensor.shape.h = shape.dim(0);
+ tensor.shape.w = shape.dim(1);
+ tensor.shape.c = shape.dim(2);
+ }
+ else if (shape.rank() == 4)
+ {
+ if (shape.dim(0) != 1)
+ {
+ return absl::UnimplementedError("Batch size is not equal to 1.");
+ }
+ tensor.shape.h = shape.dim(1);
+ tensor.shape.w = shape.dim(2);
+ tensor.shape.c = shape.dim(3);
+ }
+ else
+ {
+ return absl::InvalidArgumentError(
+ "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+ }
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *alpha = std::move(tensor);
+ }
+ return absl::OkStatus();
}
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<TensorRegistry> &tensor_reg,
- const std::shared_ptr<CreationContext> &creation_context)
+KernelGenerator::KernelGenerator(
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context)
: basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
_operations_ctx(graph.operations()), _current_layout{graph.layout()},
_tensor_builder(tensor_builder), _tensor_reg(tensor_reg), _creation_context(creation_context)
@@ -89,13 +218,13 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph,
std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- auto ret = std::make_unique<exec::FunctionSequence>();
- ret->enableDynamicShapeInferer(false);
-
+ auto fn_seq = std::make_unique<exec::FunctionSequence>();
+ fn_seq->enableDynamicShapeInferer(false);
+ _operation_index = ind;
const auto &op = _graph.operations().at(ind);
op.accept(*this);
- ret->append(releaseFunction());
- return ret;
+ fn_seq->append(releaseFunction());
+ return fn_seq;
}
void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
@@ -104,63 +233,66 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- // const auto activation = node.param().activation;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+ const bool lhs_const = _ctx.at(lhs_index).isConstant();
+ const bool rhs_const = _ctx.at(rhs_index).isConstant();
+
+ if (lhs_const && rhs_const)
+ {
+ throw std::runtime_error("No runtime input tensors for " + node.name());
+ }
+
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationType op_type = convertArithmeticType(node.param().arithmetic_type);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(lhs_index)->descriptor);
- auto lhs_shape = _tensor_reg->getClTensorReserver(lhs_index)->shape;
+ if (!lhs_const && !rhs_const)
+ {
+ auto lhs_shape = _tensor_reg->getClTensor(lhs_index)->get_info()._shape;
+ auto rhs_shape = _tensor_reg->getClTensor(rhs_index)->get_info()._shape;
+
+ bool swap =
+ (op_type == tflite::gpu::OperationType::MUL) &&
+ (lhs_shape.h <= rhs_shape.h && lhs_shape.w <= rhs_shape.w && lhs_shape.c <= rhs_shape.c);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(rhs_index)->descriptor);
- auto rhs_shape = _tensor_reg->getClTensorReserver(rhs_index)->shape;
+ auto first_index = swap ? rhs_index : lhs_index;
+ auto second_index = swap ? lhs_index : rhs_index;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(first_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(second_index)->get_info()._desc);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
- auto fn = std::make_unique<ClFunction>();
+ auto second_shape = _tensor_reg->getClTensor(second_index)->get_info()._shape;
- std::unique_ptr<GPUOperation> gpu_op;
- switch (node.param().arithmetic_type)
+ tflite::gpu::GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, second_shape);
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+ addClNode({first_index, second_index}, {ofm_index}, std::move(gpu_op));
+ }
+ else
{
- case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
- {
- std::vector<int> channels(2);
- channels[0] = lhs_shape.c;
- channels[1] = rhs_shape.c;
- SelectAdd(op_def, channels, out_shape.c, &gpu_op);
-
- auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
- auto lhs_tensor = _tensor_reg->getClTensor(lhs_index);
- auto rhs_tensor = _tensor_reg->getClTensor(rhs_index);
- gpu_op->SetSrc(lhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::LHS);
- gpu_op->SetSrc(rhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::RHS);
- gpu_op->SetDst(ofm_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
- {
- // NYI
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
- {
- // NYI
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ auto non_const_index = rhs_const ? lhs_index : rhs_index;
+ auto const_index = rhs_const ? rhs_index : lhs_index;
+
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(non_const_index)->get_info()._desc);
+
+ tflite::gpu::ElementwiseAttributes attr;
+
+ if (!readConstTensor(const_index, &attr.param).ok())
{
- // NYI
- break;
+ throw std::runtime_error("BinaryArithmetic unsupported constant tensor");
}
- default:
- assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
- break;
- }
+ tflite::gpu::GPUOperation operation =
+ CreateElementwise(_creation_context->GetGpuInfo(), op_def, op_type, attr);
+ gpu_op = absl::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+ addClNode({non_const_index}, {ofm_index}, std::move(gpu_op));
+ }
_return_fn = std::move(fn);
}
@@ -174,30 +306,30 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto param = node.param();
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input)->descriptor);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input)->get_info()._desc);
- auto input_shape = _tensor_reg->getClTensorReserver(input)->shape;
- auto kernel_shape = _tensor_reg->getClTensorReserver(kernel)->shape;
- auto output_shape = _tensor_reg->getClTensorReserver(output)->shape;
- auto bias_shape = _tensor_reg->getClTensorReserver(bias)->shape;
+ auto input_shape = _tensor_reg->getClTensor(input)->get_info()._shape;
+ auto kernel_shape = _tensor_reg->getClTensor(kernel)->get_info()._shape;
+ auto output_shape = _tensor_reg->getClTensor(output)->get_info()._shape;
+ auto bias_shape = _tensor_reg->getClTensor(bias)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
- ModelHints hints;
- std::unique_ptr<GPUOperation> gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
+ tflite::gpu::ModelHints hints;
+ std::unique_ptr<tflite::gpu::GPUOperation>
+ gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
- auto input_tensor = _tensor_reg->getClTensor(input);
auto kernel_tensor = _tensor_reg->getClTensor(kernel);
auto bias_tensor = _tensor_reg->getClTensor(bias);
- auto output_tensor = _tensor_reg->getClTensor(output);
- Convolution2DAttributes attr;
+ tflite::gpu::Convolution2DAttributes attr;
attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
- attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
- std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
+ attr.dilations =
+ tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
+ std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
bool is_weight = (_ctx.at(kernel).isConstant() ? true : false);
@@ -220,12 +352,14 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
UpdatePadding(param.padding.type, input_shape, &attr);
- gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetDeviceInfo(), op_def, hints);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Conv2D::INPUT);
+ gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetGpuInfo(), op_def, hints);
- auto fn = std::make_unique<ClFunction>();
+ tflite::gpu::cl::CLNode cl_node;
+ cl_node.inputs.resize(1);
+ cl_node.inputs[0] = input.value();
+ cl_node.outputs.resize(1);
- fn->configure(_creation_context);
+ auto fn = std::make_unique<ClFunction>(_creation_context);
const auto activation = node.param().activation;
@@ -233,47 +367,43 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
{
case ir::Activation::NONE:
{
- gpu_op->SetDst(output_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op));
+ addClNode({input}, {output}, std::move(gpu_op));
break;
}
+ case ir::Activation::RELU:
case ir::Activation::RELU6:
{
- std::unique_ptr<GPUOperation> gpu_op_1;
- OperationDef op_def_1;
- std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
- _new_tensors[output] = new_tensor;
- if (!CreateTensor(*_creation_context->context, output_shape,
- _tensor_reg->getClTensorReserver(output)->descriptor, new_tensor.get())
- .ok())
- {
- throw std::runtime_error("Error CreateTensor.");
- }
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+ tflite::gpu::OperationDef op_def_1;
+ const auto shape = _ctx.at(output).shape();
+ auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+ addClNode({input}, {new_ind}, std::move(gpu_op));
- gpu_op->SetDst(new_tensor.get(), 0);
- fn->add_operation(std::move(gpu_op));
- op_def_1.precision = CalculationsPrecision::F32;
- op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
- op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+ op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
+ op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+ op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
- // - ReLU6: clip = 6, alpha = 0
- ReLUAttributes attr_1;
- attr_1.clip = 6;
+ tflite::gpu::ReLUAttributes attr_1;
+ if (activation == ir::Activation::RELU6)
+ {
+ attr_1.clip = 6;
+ }
+ else
+ {
+ attr_1.clip = 0;
+ }
attr_1.alpha = 0;
gpu_op_1 = SelectReLU(attr_1, op_def_1);
- gpu_op_1->SetSrc(new_tensor.get(), 0);
- gpu_op_1->SetDst(output_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op_1));
+ addClNode({new_ind}, {output}, std::move(gpu_op_1));
break;
}
default:
{
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error("gpu_cl KernelGenerator : Not supported Conv2D activiation");
}
}
-
_return_fn = std::move(fn);
}
@@ -292,28 +422,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
- auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
- auto ifm_tensor = _tensor_reg->getClTensor(ifm_index);
- auto ker_tensor = _tensor_reg->getClTensor(ker_index);
- auto bias_tensor = _tensor_reg->getClTensor(bias_index);
-
bool is_weight = (_ctx.at(ker_index).isConstant() ? true : false);
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(ifm_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(ifm_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(ifm_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(ifm_index)->get_info()._shape;
- auto ker_shape = _tensor_reg->getClTensorReserver(ker_index)->shape;
+ auto ker_shape = _tensor_reg->getClTensor(ker_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
- auto bias_shape = _tensor_reg->getClTensorReserver(bias_index)->shape;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ auto out_shape = _tensor_reg->getClTensor(ofm_index)->get_info()._shape;
+ auto bias_shape = _tensor_reg->getClTensor(bias_index)->get_info()._shape;
- DepthwiseConvolution2DAttributes attr;
+ tflite::gpu::DepthwiseConvolution2DAttributes attr;
attr.strides = ToHW(stride.vertical, stride.horizontal);
- attr.dilations = HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
- std::max(static_cast<u_int32_t>(1), dilation.width_factor));
+ attr.dilations = tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
+ std::max(static_cast<u_int32_t>(1), dilation.width_factor));
if (is_weight)
{
@@ -323,12 +448,14 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
attr.weights.shape.w = ker_shape.w;
attr.weights.shape.i = ker_shape.c;
attr.weights.data.resize(ker_shape.DimensionsProduct());
- memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(), ker_tensor->total_size());
+ memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(),
+ _ctx.at(ker_index).operandSize());
}
attr.bias.id = bias_index.value();
attr.bias.shape.v = bias_shape.b != 1 ? bias_shape.b : bias_shape.c;
attr.bias.data.resize(bias_shape.DimensionsProduct());
- memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(), bias_tensor->total_size());
+ memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(),
+ _ctx.at(bias_index).operandSize());
UpdatePadding(padding.type, input_shape, &attr);
if (multiplier != 1)
@@ -338,7 +465,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const int filter_width = ker_shape.w;
const int output_depth = out_shape.c;
- tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
+ tflite::gpu::Tensor<tflite::gpu::OHWI, tflite::gpu::DataType::FLOAT32> weights;
weights.id = attr.weights.id;
weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
weights.data.resize(weights.shape.DimensionsProduct());
@@ -356,12 +483,12 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
attr.weights = std::move(weights);
}
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
if (is_weight)
{
- gpu_op = SelectDWConvolution(attr, _creation_context->GetDeviceInfo(), op_def);
+ gpu_op = SelectDWConvolution(attr, _creation_context->GetGpuInfo(), op_def);
}
else
{
@@ -370,57 +497,51 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
throw std::runtime_error(
"No support of depthwise runtime weights with channel multiplier != 1");
}
- gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetDeviceInfo(), op_def);
+ gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetGpuInfo(), op_def);
}
- gpu_op->SetSrc(ifm_tensor->handle(), ir::operation::DepthwiseConv2D::Input::INPUT);
-
- fn->configure(_creation_context);
-
const auto activation = node.param().activation;
switch (activation)
{
case ir::Activation::NONE:
{
- gpu_op->SetDst(ofm_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op));
+ addClNode({ifm_index}, {ofm_index}, std::move(gpu_op));
break;
}
+ case ir::Activation::RELU:
case ir::Activation::RELU6:
{
- std::unique_ptr<GPUOperation> gpu_op_1;
- OperationDef op_def_1;
- std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
- _new_tensors[ofm_index] = new_tensor;
- if (!CreateTensor(*_creation_context->context, out_shape,
- _tensor_reg->getClTensorReserver(ofm_index)->descriptor, new_tensor.get())
- .ok())
- {
- throw std::runtime_error("Error CreateTensor.");
- }
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+ tflite::gpu::OperationDef op_def_1;
+ const auto shape = _ctx.at(ofm_index).shape();
+ auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+ addClNode({ifm_index}, {new_ind}, std::move(gpu_op));
- gpu_op->SetDst(new_tensor.get(), 0);
- fn->add_operation(std::move(gpu_op));
- op_def_1.precision = CalculationsPrecision::F32;
- op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
+ op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
- // - ReLU6: clip = 6, alpha = 0
- ReLUAttributes attr_1;
- attr_1.clip = 6;
+ op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+
+ tflite::gpu::ReLUAttributes attr_1;
+ if (activation == ir::Activation::RELU6)
+ {
+ attr_1.clip = 6;
+ }
+ else
+ {
+ attr_1.clip = 0;
+ }
attr_1.alpha = 0;
gpu_op_1 = SelectReLU(attr_1, op_def_1);
- gpu_op_1->SetSrc(new_tensor.get(), 0);
- gpu_op_1->SetDst(ofm_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op_1));
+ addClNode({new_ind}, {ofm_index}, std::move(gpu_op_1));
break;
}
default:
{
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error("gpu_cl KernelGenerator : Not supported DepthwiseConv2D acvivation");
}
}
@@ -429,26 +550,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- std::unique_ptr<GPUOperation> gpu_op;
- auto fn = std::make_unique<ClFunction>();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
switch (node.param().op_type)
{
case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
case ir::operation::ElementwiseActivation::Type::RELU:
{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{
- node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
-
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-
- ReLUAttributes attr;
+ tflite::gpu::ReLUAttributes attr;
if (ir::operation::ElementwiseActivation::Type::LEAKY_RELU == node.param().op_type)
{
attr.alpha = node.param().alpha;
@@ -460,17 +578,33 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
attr.clip = node.param().alpha;
}
gpu_op = SelectReLU(attr, op_def);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::ElementwiseActivation::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
- _return_fn = std::move(fn);
+ break;
+ }
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ {
+ if (_ctx.at(input_index).typeInfo().type() != ir::DataType::FLOAT32)
+ {
+ throw std::runtime_error{"Unsupported data type of LOGISTIC"};
+ }
+ tflite::gpu::GPUOperation operation =
+ CreateElementwiseOneInput(_creation_context->GetGpuInfo(), op_def,
+ convertElementwiseActivationType(node.param().op_type));
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+ break;
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ {
+ tflite::gpu::GPUOperation operation = CreateElementwiseOneInput(
+ _creation_context->GetGpuInfo(), op_def, tflite::gpu::OperationType::TANH);
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
break;
}
default:
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error(
+ "gpu_cl KernelGenerator : Not supported operation on ElementwiseActivation");
}
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
+ _return_fn = std::move(fn);
}
void KernelGenerator::visit(const ir::operation::Pool2D &node)
@@ -478,24 +612,24 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
const auto op_type = convertPoolType(node.param().op_type);
- Pooling2DAttributes attributes;
+ tflite::gpu::Pooling2DAttributes attributes;
attributes.type = op_type;
- attributes.kernel = HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
- attributes.strides =
- HW(stride.vertical > 0 ? stride.vertical : 1, stride.horizontal > 0 ? stride.horizontal : 1);
+ attributes.kernel = tflite::gpu::HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
+ attributes.strides = tflite::gpu::HW(stride.vertical > 0 ? stride.vertical : 1,
+ stride.horizontal > 0 ? stride.horizontal : 1);
if (node.param().padding.type == ir::PaddingType::SAME)
{
@@ -503,23 +637,15 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
}
else
{
- attributes.padding.prepended = HW(0, 0);
- attributes.padding.appended = HW(0, 0);
+ attributes.padding.prepended = tflite::gpu::HW(0, 0);
+ attributes.padding.appended = tflite::gpu::HW(0, 0);
}
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
gpu_op = SelectPooling(attributes, op_def);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
-
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Pool2D::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
@@ -528,31 +654,24 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
- auto output_shape = _tensor_reg->getClTensorReserver(output_index)->shape;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+ auto output_shape = _tensor_reg->getClTensor(output_index)->get_info()._shape;
- ReshapeAttributes attr;
+ tflite::gpu::ReshapeAttributes attr;
attr.new_shape = output_shape;
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
const int src_channels = input_shape.c;
SelectReshape(src_channels, attr.new_shape.c, op_def, &gpu_op);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Reshape::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
@@ -568,27 +687,20 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
throw std::runtime_error("Softmax.beta != 1 is not supported in gpu_cl");
}
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- auto fn = std::make_unique<ClFunction>();
+ auto fn = std::make_unique<ClFunction>(_creation_context);
- std::unique_ptr<GPUOperation> gpu_op;
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
SelectSoftmax(input_shape, op_def, &gpu_op);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
-
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Softmax::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h
index 91fd3cd9d..5e8c2621f 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.h
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h
@@ -26,6 +26,7 @@
#include <backend/CustomKernelBuilder.h>
#include <backend/basic/KernelGeneratorBase.h>
+#include <backend/BackendContext.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
#include <ir/Operations.Include.h>
@@ -46,6 +47,8 @@ public:
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+ void get_operation(FunctionMap &Functions);
+
private:
void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
@@ -54,6 +57,14 @@ private:
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Softmax &) override;
+ absl::Status readConstTensor(const ir::OperandIndex &index, tflite::gpu::TensorOrScalar *param);
+ absl::Status readConstTensor(
+ const ir::OperandIndex &index,
+ absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha);
+ void addClNode(const std::vector<ir::OperandIndex> &inputs,
+ const std::vector<ir::OperandIndex> &outputs,
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op);
private:
const ir::Operands &_ctx;
@@ -62,7 +73,9 @@ private:
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<TensorRegistry> _tensor_reg;
std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
- ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
+ std::vector<tflite::gpu::cl::CLNode> _nodes;
+ ir::OperationIndex _operation_index;
+ std::vector<ir::OperationIndex> _operation_indexes;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h
index a3b9b39de..4b34c39b9 100644
--- a/runtime/onert/backend/gpu_cl/MemoryManager.h
+++ b/runtime/onert/backend/gpu_cl/MemoryManager.h
@@ -17,17 +17,18 @@
#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
-#include "ex/InferenceContextEx.h"
#include "operand/CLTensor.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "util/logging.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
-#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
#include <cassert>
@@ -41,24 +42,31 @@ namespace gpu_cl
class MemoryManager
{
public:
- MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+ MemoryManager(tflite::gpu::cl::CLContext *context, tflite::gpu::CreateGpuModelInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+ : _context{context}, _create_info{create_info}, _environment{environment}
+ {
+ }
~MemoryManager() = default;
void allocate(void)
{
+ std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> converter_builder =
+ NewConverterBuilder(_environment.get());
for (const auto &tensor_entry : _tensors)
{
auto tensor = tensor_entry.second;
auto type = tensor->get_type();
- // if (type == TensorType::TENSOR_TYPE_DELETE) {
- // continue;
- // }
+ if (type == TensorType::TENSOR_TYPE_DELETE)
+ {
+ continue;
+ }
+
+ const auto &shape = tensor->get_info()._shape;
+ const auto &descriptor = tensor->get_info()._desc;
- const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
- const auto &shape = t->shape;
- const auto &descriptor = t->descriptor;
if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
{
std::runtime_error("Failed to CreateTensor");
@@ -66,10 +74,10 @@ public:
switch (type)
{
case TensorType::TENSOR_TYPE_INPUT:
- tensor->writeConvertInit();
+ tensor->writeConvertInit(converter_builder.get(), _environment);
break;
case TensorType::TENSOR_TYPE_OUTPUT:
- tensor->readConvertInit();
+ tensor->readConvertInit(converter_builder.get(), _environment);
break;
default:
break;
@@ -89,65 +97,60 @@ public:
{ /* DO NOTHING */
}
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type)
{
- tflite::gpu::ValueId max_id = 0;
- auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
- const auto shape = info.shape();
+ auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
- auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
- _tensors[ind] = tensor;
- tflite::gpu::BHWC t_shape;
- switch (shape.rank())
+ tflite::gpu::BHWC BHWC_shape = ToBHWC(info.shape());
+
+ tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+ tflite::gpu::Layout layout =
+ BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+ if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+ data_type, layout, &storage_type)
+ .ok())
{
- case 1:
- // B layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
- break;
- case 2:
- // BC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
- break;
- case 3:
- // BWC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
- break;
- case 4:
- // BHWC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
- break;
- default:
- break;
+ throw std::runtime_error("Failed to SelectBestStorageType");
}
+ auto tensor = std::make_shared<operand::CLTensor>(
+ info.shape().rank(), type, BHWC_shape,
+ tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+ _tensors[ind] = tensor;
+ }
- tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
- tflite::gpu::Layout layout =
- t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+ ir::OperandIndex addTensor(const ir::Shape &shape)
+ {
+ auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
- tflite::gpu::ValueId id = ind.value();
- storage_type =
- tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
- auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
- dummy->shape = t_shape;
- dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
- tensor_reserver_.Add(id, dummy);
+ tflite::gpu::BHWC BHWC_shape = ToBHWC(shape);
- max_id = std::max(max_id, id);
+ tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+ tflite::gpu::Layout layout =
+ BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
- tensor_reserver_.SetNext(max_id + 1);
+ if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+ data_type, layout, &storage_type)
+ .ok())
+ {
+ throw std::runtime_error("Failed to SelectBestStorageType");
+ }
+ auto ind = ir::OperandIndex(_new_id--);
+ auto tensor = std::make_shared<operand::CLTensor>(
+ shape.rank(), TensorType::TENSOR_TYPE_VALID, BHWC_shape,
+ tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+ _tensors[ind] = tensor;
+ return ind;
}
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
- InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
-
private:
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
- InferenceContextEx::TensorReserverEx tensor_reserver_;
tflite::gpu::cl::CLContext *_context;
+ tflite::gpu::CreateGpuModelInfo _create_info;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+ uint32_t _new_id = UINT32_MAX;
};
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
index e71733427..318335471 100644
--- a/runtime/onert/backend/gpu_cl/TensorBuilder.cc
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
@@ -21,7 +21,6 @@
#include "TensorManager.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
@@ -45,11 +44,8 @@ namespace gpu_cl
using UsesType = cl_common::UsesType;
-TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
- environment}
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
@@ -89,9 +85,9 @@ void TensorBuilder::allocate(void)
{
auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
- for (auto &entry : lifetime_map)
+ for (const auto &entry : lifetime_map)
{
- auto &use = entry.second;
+ const auto &use = entry.second;
auto use_type = use.first;
auto use_index = use.second;
assert(use_index.valid());
@@ -118,18 +114,22 @@ void TensorBuilder::buildTensors(void)
assert(_tensor_mgr->constTensors().size() == 0);
assert(_tensor_mgr->nonconstTensors().size() == 0);
// Normal tensors
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) > 0)
continue;
auto type = _tensor_type_map.at(ind);
const auto &info = entry.second;
- _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
- type);
+ _tensor_mgr->buildTensor(ind, info, type);
}
}
+ir::OperandIndex TensorBuilder::addTensor(const ir::Shape &shape)
+{
+ return _tensor_mgr->addTensor(shape);
+}
+
} // namespace gpu_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h
index 2a5cb8b5e..e0333fef5 100644
--- a/runtime/onert/backend/gpu_cl/TensorBuilder.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h
@@ -34,9 +34,7 @@ namespace gpu_cl
class TensorBuilder
{
public:
- TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+ TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -83,6 +81,7 @@ public:
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
+ ir::OperandIndex addTensor(const ir::Shape &shape);
private:
const ir::Operands &_operands;
@@ -92,8 +91,6 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<TensorManager> _tensor_mgr;
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
- std::shared_ptr<tflite::gpu::cl::Environment> _environment;
// for linear executor
cl_common::LifetimeSeq _lifetime_seq;
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc
index 9fe0605ac..02e26ed91 100644
--- a/runtime/onert/backend/gpu_cl/TensorManager.cc
+++ b/runtime/onert/backend/gpu_cl/TensorManager.cc
@@ -42,23 +42,28 @@ void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+ TensorType type)
{
assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
if (info.isConstant())
{
- _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _const_mgr->buildTensor(ind, info, type);
_ind_to_mgr.insert({ind, *_const_mgr});
}
else
{
- _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _nonconst_mgr->buildTensor(ind, info, type);
_ind_to_mgr.insert({ind, *_nonconst_mgr});
}
}
+ir::OperandIndex TensorManager::addTensor(const ir::Shape &shape)
+{
+ auto ind = _nonconst_mgr->addTensor(shape);
+ _ind_to_mgr.insert({ind, *_nonconst_mgr});
+
+ return ind;
+}
void TensorManager::startLifetime(const ir::OperandIndex &ind)
{
@@ -96,29 +101,6 @@ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconst
return _nonconst_mgr->tensors();
}
-std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
-{
- if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _nonconst_mgr->tensorReservers().Get(ind.value());
- }
- else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _const_mgr->tensorReservers().Get(ind.value());
- }
- return nullptr;
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
-{
- return _const_mgr->tensorReservers();
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
-{
- return _nonconst_mgr->tensorReservers();
-}
-
void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
{
for (auto it : _nonconst_mgr->tensors())
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h
index 52abc579a..5b09ac130 100644
--- a/runtime/onert/backend/gpu_cl/TensorManager.h
+++ b/runtime/onert/backend/gpu_cl/TensorManager.h
@@ -19,8 +19,10 @@
#include "MemoryManager.h"
+#include "Utils.h"
+
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "ir/OperandInfo.h"
#include "ir/OperandIndexMap.h"
@@ -44,10 +46,8 @@ public:
void deallocateConsts(void);
void deallocateNonconsts(void);
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type);
+ ir::OperandIndex addTensor(const ir::Shape &shape);
std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
@@ -55,10 +55,6 @@ public:
void finishLifetime(const ir::OperandIndex &ind);
std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
-
- InferenceContextEx::TensorReserverEx &constTensorReservers(void);
- InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
@@ -73,10 +69,14 @@ private:
ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
};
-inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
+inline TensorManager *
+createTensorManager(tflite::gpu::cl::CLContext *context,
+ tflite::gpu::CreateGpuModelInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
{
VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
- return new TensorManager(new MemoryManager(context), new MemoryManager(context));
+ return new TensorManager(new MemoryManager(context, create_info, environment),
+ new MemoryManager(context, create_info, environment));
}
} // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/TensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h
index 6f17aff54..be342e9cb 100644
--- a/runtime/onert/backend/gpu_cl/TensorRegistry.h
+++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h
@@ -44,7 +44,7 @@ public:
auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
- auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
+ ir::OperandIndex addNewClTensor(const ir::Shape &shape) { return _tensor_mgr->addTensor(shape); }
private:
TensorManager *_tensor_mgr;
diff --git a/runtime/onert/backend/gpu_cl/Utils.h b/runtime/onert/backend/gpu_cl/Utils.h
new file mode 100644
index 000000000..1953c0e43
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/Utils.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/Pool2D.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+inline tflite::gpu::HW ToHW(int32_t h, int32_t w)
+{
+ return tflite::gpu::HW(h > 0 ? h : 1, w > 0 ? w : 1);
+}
+
+template <typename AttrT>
+inline void UpdatePadding(const ir::PaddingType type, const tflite::gpu::BHWC &input_shape,
+ AttrT *attr)
+{
+ if (type == ir::PaddingType::SAME)
+ {
+ attr->padding = CalculateSamePadding(input_shape, *attr);
+ }
+ else
+ {
+ attr->padding.prepended = tflite::gpu::HW(0, 0);
+ attr->padding.appended = tflite::gpu::HW(0, 0);
+ }
+}
+
+inline tflite::gpu::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return tflite::gpu::PoolingType::AVERAGE;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return tflite::gpu::PoolingType::MAX;
+ default:
+ throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+ }
+}
+
+inline tflite::gpu::BHWC ToBHWC(ir::Shape shape)
+{
+ switch (shape.rank())
+ {
+ case 1:
+ // B layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+ break;
+ case 2:
+ // BC layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+ break;
+ case 3:
+ // BWC layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+ break;
+ case 4:
+ // BHWC layout
+ return tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+ break;
+ default:
+ break;
+ }
+ return tflite::gpu::BHWC();
+}
+
+inline bool CheckIfLinearConvertible(const ir::Shape *shape)
+{
+ if (shape->num_elements() <= 0)
+ {
+ return false;
+ }
+ for (int i = 0; i < shape->rank() - 1; ++i)
+ {
+ if (shape->dim(i) != 1)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline tflite::gpu::OperationType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return tflite::gpu::OperationType::ADD;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return tflite::gpu::OperationType::SUB;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return tflite::gpu::OperationType::MUL;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return tflite::gpu::OperationType::DIV;
+ default:
+ throw std::runtime_error("Unsupported ArithmeticType");
+ }
+}
+
+inline tflite::gpu::OperationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return tflite::gpu::OperationType::SIGMOID;
+ default:
+ throw std::runtime_error("Unsupported ElementwiseActivationType");
+ }
+}
+
+enum TensorType
+{
+ TENSOR_TYPE_VALID = 0,
+ TENSOR_TYPE_INPUT = 1,
+ TENSOR_TYPE_OUTPUT = 2,
+ TENSOR_TYPE_DELETE = 3
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
deleted file mode 100644
index f67387904..000000000
--- a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-
-#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/common/model.h"
-#include "absl/strings/str_cat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class InferenceContextEx : public tflite::gpu::cl::InferenceContext
-{
-public:
- struct DummyTensor
- {
- tflite::gpu::BHWC shape;
- tflite::gpu::cl::TensorDescriptor descriptor;
-
- bool operator==(const DummyTensor &b) const
- {
- return shape == b.shape && descriptor == b.descriptor;
- }
- };
-
- class TensorReserverEx
- {
- public:
- tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
- {
- reservations_[next_] = dummy;
- return next_++;
- }
- void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
- {
- reservations_[id] = dummy;
- }
- void SetNext(tflite::gpu::ValueId id) { next_ = id; }
- bool HaveTensor(tflite::gpu::ValueId id)
- {
- return reservations_.find(id) != reservations_.end();
- }
- std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
-
- std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
- GetTensorDescs() const
- {
- std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
- for (auto &v : reservations_)
- {
- tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
- desc.shape.b = v.second->shape.b;
- desc.shape.h = v.second->shape.h;
- desc.shape.w = v.second->shape.w;
- desc.shape.d = 1;
- desc.shape.c = v.second->shape.c;
- result.push_back({v.first, desc});
- }
- return result;
- }
-
- void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
- &tensors)
- {
- for (auto &v : tensors)
- {
- auto dummy = std::make_shared<DummyTensor>();
- dummy->descriptor = v.second;
- dummy->shape.b = v.second.shape.b;
- dummy->shape.h = v.second.shape.h;
- dummy->shape.w = v.second.shape.w;
- dummy->shape.c = v.second.shape.c;
- Add(v.first, dummy);
- }
- }
-
- private:
- // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
- std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
- tflite::gpu::ValueId next_ = 0;
- };
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
index d3ed102a1..1b19b10f8 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -19,7 +19,7 @@
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
using namespace tflite::gpu::cl;
@@ -32,9 +32,9 @@ namespace gpu_cl
namespace operand
{
-CLTensor::CLTensor(size_t rank, ir::Shape shape,
- std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
- : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc)
+ : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>())
{
}
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
index f2153f430..269551d0c 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -38,8 +38,8 @@ public:
CLTensor() = delete;
public:
- CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
- TensorType type);
+ CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc);
public:
const tflite::gpu::cl::Tensor *handle() const override;
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
index a95f78056..ef71bbc13 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -43,8 +43,10 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
fn(*this);
}
-void ICLTensor::writeConvertInit()
+void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment)
{
+ _environment = environment;
TensorObjectDef input_def;
input_def.dimensions.b = handle()->Batch();
input_def.dimensions.h = handle()->Height();
@@ -74,21 +76,20 @@ void ICLTensor::writeConvertInit()
output_def.object_def.data_type = handle()->GetDataType();
input_def.object_def.user_provided = false;
- _converter_builder = NewConverterBuilder(_environment.get());
- if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
+ if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
{
throw std::runtime_error("Failed to make converter_to");
}
- if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
+ if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
{
throw std::runtime_error("Failed to make converter_from");
}
}
-void ICLTensor::readConvertInit()
+void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment)
{
- _converter_builder = NewConverterBuilder(_environment.get());
-
+ _environment = environment;
TensorObjectDef input_def;
input_def.dimensions.b = handle()->Batch();
input_def.dimensions.h = handle()->Height();
@@ -118,20 +119,20 @@ void ICLTensor::readConvertInit()
TensorObjectDef output_def = permute_def;
output_def.object_def.object_type = ObjectType::CPU_MEMORY;
- if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
+ if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
{
throw std::runtime_error("Failed to make converter_from");
}
- if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
+ if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
{
throw std::runtime_error("Failed to make converter_to");
}
}
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
{
- TensorObject input_obj =
- MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
+ TensorObject input_obj = MakeReadableCpuMemory(
+ absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct()));
TensorObject output_obj;
@@ -162,13 +163,19 @@ void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
{
throw std::runtime_error("Failed to write cl buffer from cpu memory");
}
+
+ if (blocking && !_environment->queue()->WaitForCompletion().ok())
+ {
+ throw std::runtime_error("Failed to WaitForCompletion");
+ }
+
if (!_converter_from->Convert(permute_obj, output_obj).ok())
{
throw std::runtime_error("Failed to change layout");
}
}
-void ICLTensor::enqueueReadBuffer(void *ptr, bool)
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
{
TensorObject input_obj;
@@ -196,7 +203,7 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
}
TensorObject output_obj =
- MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
+ MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct()));
if (!_converter_from->Convert(input_obj, permute_obj).ok())
{
@@ -206,6 +213,11 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
{
throw std::runtime_error("Failed to read cl buffer");
}
+
+ if (blocking && !_environment->queue()->WaitForCompletion().ok())
+ {
+ throw std::runtime_error("Failed to WaitForCompletion");
+ }
}
} // namespace operand
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
index b8ad4469f..47420a1c2 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -26,7 +26,7 @@
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
-#include "TensorBuilderHelper.h"
+#include "Utils.h"
namespace onert
{
@@ -37,6 +37,12 @@ namespace gpu_cl
namespace operand
{
+struct TensorInfo
+{
+ tflite::gpu::BHWC _shape;
+ tflite::gpu::TensorDescriptor _desc;
+};
+
class ICLTensor : public ITensor
{
public:
@@ -46,15 +52,15 @@ public:
ICLTensor(ICLTensor &&) = default;
ICLTensor &operator=(ICLTensor &&) = default;
- ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
- TensorType type)
- : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
+ ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc)
+ : _rank{rank}, _type(type), _info{shape, desc}
{
}
public:
uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
- size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
+ size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); }
size_t calcOffset(const ir::Coordinates &) const final
{
throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
@@ -78,16 +84,38 @@ public:
throw std::runtime_error("ICLTensor::data_zero_points() is not supported.");
}
bool is_dynamic() const override { return false; }
- ir::Shape getShape() const override { return _shape; }
+ ir::Shape getShape() const override
+ {
+ tflite::gpu::BHWC shape = _info._shape;
+ switch (_rank)
+ {
+ case 1:
+ return ir::Shape{shape.b};
+ case 2:
+ return ir::Shape{shape.b, shape.c};
+ case 3:
+ return ir::Shape{shape.b, shape.w, shape.c};
+ case 4:
+ return ir::Shape{shape.b, shape.h, shape.w, shape.c};
+ default:
+ break;
+ }
+ return ir::Shape{};
+ }
bool has_padding() const override { return false; }
void access(const std::function<void(ITensor &tensor)> &fn) final;
bool needMemoryMap() const final { return true; }
void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
void enqueueReadBuffer(void *ptr, bool blocking = true) final;
- void writeConvertInit();
- void readConvertInit();
+ void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment);
+ void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment);
+
TensorType get_type() { return _type; }
+ TensorType set_type(TensorType type) { return _type = type; }
+ const TensorInfo get_info() { return _info; }
public:
virtual const tflite::gpu::cl::Tensor *handle() const = 0;
@@ -96,11 +124,10 @@ public:
private:
protected:
size_t _rank; // Actual rank (reflects extended rank)
- ir::Shape _shape;
- std::shared_ptr<tflite::gpu::cl::Environment> _environment;
TensorType _type;
- std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+ TensorInfo _info;
tflite::gpu::cl::CLMemory _cl_memory;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
};
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
index 877772619..48da91b50 100644
--- a/runtime/onert/backend/ruy/BackendContext.cc
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc
index e46b11d20..39048f2be 100644
--- a/runtime/onert/backend/trix/BackendContext.cc
+++ b/runtime/onert/backend/trix/BackendContext.cc
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/backend/trix/BatchThreadPool.cc b/runtime/onert/backend/trix/BatchThreadPool.cc
new file mode 100644
index 000000000..3c2001d75
--- /dev/null
+++ b/runtime/onert/backend/trix/BatchThreadPool.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchThreadPool.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+BatchThreadPool::BatchThreadPool(size_t num_threads) : _num_threads(num_threads), _stop_all(false)
+{
+ _worker_threads.reserve(_num_threads);
+ for (uint32_t thread_num = 0; thread_num < _num_threads; ++thread_num)
+ {
+ _worker_threads.emplace_back([this, thread_num]() { this->worker(thread_num); });
+ }
+}
+
+void BatchThreadPool::worker(uint32_t thread_num)
+{
+ while (true)
+ {
+ std::unique_lock<std::mutex> lock(_m_job_queue);
+ _cv_job_queue.wait(lock, [this]() { return !this->_job_queue.empty() || _stop_all; });
+ if (_stop_all && this->_job_queue.empty())
+ {
+ return;
+ }
+
+ // Pop a job in front of queue
+ auto job = std::move(_job_queue.front());
+ _job_queue.pop();
+ lock.unlock();
+
+ // Run the job
+ job(thread_num);
+ }
+}
+
+BatchThreadPool::~BatchThreadPool()
+{
+ _stop_all = true;
+ _cv_job_queue.notify_all();
+
+ for (auto &&t : _worker_threads)
+ {
+ t.join();
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BatchThreadPool.h b/runtime/onert/backend/trix/BatchThreadPool.h
new file mode 100644
index 000000000..bc2936fb4
--- /dev/null
+++ b/runtime/onert/backend/trix/BatchThreadPool.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+#define __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Class that has a threadpool for batch-by-batch multi-threading
+ *
+ */
+class BatchThreadPool
+{
+public:
+ BatchThreadPool(size_t num_threads);
+ ~BatchThreadPool();
+
+ /**
+ * @brief
+ *
+ * @tparam F Type of the function for job
+ * @tparam Args Type of arguments of job
+ * @param f Function for job
+ * @param args Arguments of job
+ * @return std::future<typename std::result_of<F(uint32_t, Args...)>::type>
+ */
+ template <class F, class... Args>
+ std::future<typename std::result_of<F(uint32_t, Args...)>::type> enqueueJob(F &&f,
+ Args &&... args)
+ {
+ if (_stop_all)
+ {
+ throw std::runtime_error("Stop all threads in BatchThreadPool");
+ }
+
+ using return_type = typename std::result_of<F(uint32_t, Args...)>::type;
+ auto job = std::make_shared<std::packaged_task<return_type(uint32_t)>>(
+ std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Args>(args)...));
+ std::future<return_type> job_result_future = job->get_future();
+ {
+ // Push job in the assigned queue
+ std::lock_guard<std::mutex> lock(_m_job_queue);
+
+ // Push job
+ _job_queue.push([job](uint32_t thread_num) { (*job)(thread_num); });
+ }
+ _cv_job_queue.notify_one();
+
+ return job_result_future;
+ }
+
+private:
+ /**
+ * @brief Worker to run jobs
+ *
+ * @param thread_num Thread number on which worker is running
+ */
+ void worker(uint32_t thread_num);
+
+private:
+ /**
+ * @brief The number of threads
+ *
+ */
+ size_t _num_threads;
+
+ /**
+ * @brief Threads worked for jobs
+ *
+ */
+ std::vector<std::thread> _worker_threads;
+
+ /**
+ * @brief Queue for jobs
+ *
+ */
+ std::queue<std::function<void(uint32_t)>> _job_queue;
+
+ /**
+ * @brief condition_variables for _job_queue and _worker_threads
+ *
+ */
+ std::condition_variable _cv_job_queue;
+
+ /**
+ * @brief Mutex for the queue _job_queue
+ *
+ */
+ std::mutex _m_job_queue;
+
+ /**
+ * @brief Whether all threads are stopped
+ *
+ */
+ bool _stop_all;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
diff --git a/runtime/onert/backend/trix/Convert.cc b/runtime/onert/backend/trix/Convert.cc
new file mode 100644
index 000000000..fe003e7ea
--- /dev/null
+++ b/runtime/onert/backend/trix/Convert.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+data_layout convertDataLayout(const ir::Layout layout)
+{
+ switch (layout)
+ {
+ case ir::Layout::NCHW:
+ return DATA_LAYOUT_NCHW;
+ case ir::Layout::NHWC:
+ return DATA_LAYOUT_NHWC;
+ default:
+ throw std::runtime_error("Unknown Layout");
+ }
+}
+
+data_type convertDataType(const ir::DataType type)
+{
+ switch (type)
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ return DATA_TYPE_QASYMM8;
+ case ir::DataType::QUANT_INT16_SYMM:
+ return DATA_TYPE_QSYMM16;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Convert.h b/runtime/onert/backend/trix/Convert.h
new file mode 100644
index 000000000..662ed44b6
--- /dev/null
+++ b/runtime/onert/backend/trix/Convert.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONVERT_H__
+#define __ONERT_BACKEND_TRIX_CONVERT_H__
+
+#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Layout.h>
+
+#include <libnpuhost.h>
+#include <type_traits>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Convert type of layout from onert type to npu type
+ *
+ * @param layout Layout type in onert
+ * @return data_layout Layout type in npu
+ */
+data_layout convertDataLayout(const ir::Layout layout);
+
+/**
+ * @brief Convert type of data from onert type to npu type
+ *
+ * @param type Data type in onert
+ * @return data_type Data type in npu
+ */
+data_type convertDataType(const ir::DataType type);
+
+/**
+ * @brief Set the tensors_data_info object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have data information
+ * @param info tensors_data_info to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setDataInfo(const std::vector<T *> &tensors, tensors_data_info *info)
+{
+ info->num_info = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < info->num_info; ++idx)
+ {
+ info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+ info->info[idx].type = convertDataType(tensors[idx]->data_type());
+ }
+}
+
+/**
+ * @brief Set the generic_buffers object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have buffer information
+ * @param buf generic_buffers to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setBuffers(const std::vector<T *> &tensors, generic_buffers *buf)
+{
+ buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+ {
+ buf->bufs[idx].addr = tensors[idx]->buffer();
+ buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+ buf->bufs[idx].type = BUFFER_MAPPED;
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONVERT_H__
diff --git a/runtime/onert/backend/trix/DevContext.cc b/runtime/onert/backend/trix/DevContext.cc
new file mode 100644
index 000000000..059514878
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.cc
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevContext.h"
+
+#include "Convert.h"
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+// All things related to npu device handle are gathered this Class, but when implementing npu
+// deamon, others except the context roles should be seperated.
+DevContext::DevContext() : _dev_handles{}, _model_ids{}, _meta_map{}
+{
+ auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+ if (dev_count <= 0)
+ {
+ throw std::runtime_error("Unable to find TRIX NPU device");
+ }
+
+ // Get NPU device handles
+ for (int i = 0; i < dev_count; ++i)
+ {
+ npudev_h handle;
+ if (getNPUdeviceByType(&handle, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+ {
+ throw std::runtime_error("Failed to get TRIX NPU device handle");
+ }
+ _dev_handles.emplace_back(handle);
+ }
+
+ // NOTE Do not change the number of threads as long as jobs in thread call
+ // the synchronous APIs such as submitNPU_request()
+ _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
+ // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
+ // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
+ // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
+}
+
+DevContext::~DevContext()
+{
+ // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
+ // be terminated
+ _batch_thread_pool.reset(nullptr);
+
+ for (const auto &dev_handle : _dev_handles)
+ {
+ unregisterNPUmodel_all(dev_handle);
+ putNPUdevice(dev_handle);
+ }
+}
+
+ModelID DevContext::registerModel(const std::string &model_file_path)
+{
+ auto meta = getNPUmodel_metadata(model_file_path.c_str(), false);
+
+ if (meta == nullptr)
+ {
+ throw std::runtime_error("Unable to extract the model metadata");
+ }
+
+ generic_buffer file_info;
+ file_info.type = BUFFER_FILE;
+ file_info.filepath = model_file_path.c_str();
+ file_info.size = meta->size;
+
+ ModelID model_id;
+
+ for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+ {
+ // Register model for each device
+ uint32_t model_id_at_device;
+ if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
+ {
+ throw std::runtime_error("Failed to register npu model");
+ }
+
+ if (dev_num == 0)
+ {
+ model_id = model_id_at_device;
+ _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(meta);
+ }
+ else
+ {
+ _meta_map[model_id_at_device] = _meta_map[model_id];
+ }
+
+ _model_ids[model_id].resize(dev_num + 1);
+ _model_ids[model_id].at(dev_num) = model_id_at_device;
+ }
+
+ // Return the model id for device 0 only
+ return model_id;
+}
+
+void DevContext::unRegisterModel(ModelID model_id)
+{
+ for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+ {
+ const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
+ const auto &dev_handle = _dev_handles.at(dev_num);
+
+ // Remove meta data
+ _meta_map.erase(model_id_at_device);
+
+ // Unregister Model for each device
+ unregisterNPUmodel(dev_handle, model_id_at_device);
+ }
+ // Remove model IDs
+ _model_ids.erase(model_id);
+}
+
+void DevContext::requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+ output_buffers *output_bufs, tensors_data_info *out_info,
+ size_t batch_size)
+{
+ if (batch_size > 1)
+ {
+ if (in_info->num_info != 1)
+ {
+ throw std::runtime_error("Supported only an input that has batch now");
+ }
+ if (out_info->num_info != 1)
+ {
+ throw std::runtime_error("Supported only one output now");
+ }
+
+ if (input_bufs->bufs[0].size % batch_size != 0)
+ {
+ throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
+ ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
+ }
+
+ if (output_bufs->bufs[0].size % batch_size != 0)
+ {
+ throw std::runtime_error(
+ "Invalid batch size. batch size :" + std::to_string(batch_size) +
+ ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
+ }
+
+ // inputs/outputs for each batch
+ std::vector<input_buffers> in_buffers_vec(batch_size);
+ std::vector<output_buffers> out_buffers_vec(batch_size);
+
+ // Run on thread pool
+ std::vector<std::future<int32_t>> batch_futures;
+ for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
+ {
+ // Enqueue jobs
+ // The in_info and out_info are always the same even if they are divided by batch, so they are
+ // used as they are.
+ auto future = _batch_thread_pool->enqueueJob(
+ [batch_size, in_info, out_info,
+ this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
+ const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
+ // Set buffers of inputs/outputs for each batch
+ // TODO Support multiple inputs/outputs
+ input_buffers in_batch_buffers;
+ in_batch_buffers.num_buffers = input_bufs->num_buffers;
+ const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
+ setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
+ &in_batch_buffers.bufs[0]);
+
+ output_buffers out_batch_buffers;
+ out_batch_buffers.num_buffers = output_bufs->num_buffers;
+ const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
+ setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
+ &out_batch_buffers.bufs[0]);
+
+ try
+ {
+ // dev_num is the same as the thread number in _batch_thread_pool
+ this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
+ out_info);
+ }
+ catch (...)
+ {
+ _eptr = std::current_exception();
+ }
+
+ return batch_num;
+ },
+ model_id, input_bufs, output_bufs, batch_num);
+ batch_futures.emplace_back(std::move(future));
+ }
+
+ for (auto &&future : batch_futures)
+ {
+ future.get();
+ }
+
+ if (_eptr)
+ {
+ std::exception_ptr eptr(nullptr);
+ _eptr.swap(eptr);
+ std::rethrow_exception(eptr);
+ }
+ }
+ else
+ {
+ runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
+ }
+}
+
+void DevContext::runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+ tensors_data_info *in_info, output_buffers *output_bufs,
+ tensors_data_info *out_info)
+{
+ const auto &model_id_at_device = _model_ids.at(model_id).at(dev_num);
+
+ const auto meta = _meta_map.at(model_id_at_device);
+ if (meta->input_seg_num != in_info->num_info)
+ {
+ throw std::runtime_error("The number of inputs does not match to model input seg num");
+ }
+
+ if (meta->output_seg_num != out_info->num_info)
+ {
+ throw std::runtime_error("The number of outputs does not match to model output seg num");
+ }
+
+ const auto &dev_handle = _dev_handles.at(dev_num);
+ int req_id;
+
+ if (auto error_code = createNPU_request(dev_handle, model_id_at_device, &req_id))
+ {
+ throw std::runtime_error("Unable to create NPU request with model id (" +
+ std::to_string(model_id_at_device) + ")" +
+ " error code : " + std::to_string(error_code));
+ }
+
+ if (auto error_code =
+ setNPU_requestData(dev_handle, req_id, input_bufs, in_info, output_bufs, out_info))
+ {
+ removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("Unable to create NPU request for model id (" +
+ std::to_string(model_id_at_device) + ")" +
+ " error code : " + std::to_string(error_code));
+ }
+
+ // NOTE submitNPU_request is not thread-safe(?). It is rarely hanging(unresponsive).
+ // Ultimately, to solve this problem, we have to either use other thread-safe API or
+ // change submitNPU_request to be thread-safe, but both works take time.
+ // As a workaround, let's allow hanging thread.
+ // TODO Change submitNPU_request to be thread-safe or replaced with other thread-safe API
+ std::packaged_task<int(npudev_h, int)> task(submitNPU_request);
+ auto f = task.get_future();
+ std::thread thread_submit_request(std::move(task), dev_handle, req_id);
+ auto status = f.wait_until(std::chrono::system_clock::now() + std::chrono::seconds(60));
+ if (status == std::future_status::timeout)
+ {
+ // There is no way to terminate hanging submitNPU_request from the outside.
+ // If a hanging thread is detached, it will remain as a hanging thread. Even so, it's better
+ // than having the main thread hanging.
+ thread_submit_request.detach();
+
+ // TODO Enable removeNPU_request after resolving hanging.
+ // removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("The npu API \"submitNPU_request\" timeout");
+ }
+
+ auto error_code = f.get();
+ thread_submit_request.join();
+ if (error_code != 0)
+ {
+ removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+ ")" + " error code : " + std::to_string(error_code));
+ }
+
+ if (auto error_code = removeNPU_request(dev_handle, req_id))
+ {
+ throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+ ")" + " error code : " + std::to_string(error_code));
+ }
+}
+
+void DevContext::setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num,
+ uint64_t batch_offset, generic_buffer *batch_buf)
+{
+ batch_buf->addr = reinterpret_cast<uint8_t *>(origin_buf.addr) + batch_num * batch_offset;
+ batch_buf->size = batch_offset;
+ batch_buf->type = BUFFER_MAPPED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
index a7dbd7a59..cd8de97e6 100644
--- a/runtime/onert/backend/trix/DevContext.h
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -17,7 +17,12 @@
#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#include "BatchThreadPool.h"
+
#include <libnpuhost.h>
+#include <memory>
+#include <string>
+#include <unordered_map>
namespace onert
{
@@ -26,103 +31,117 @@ namespace backend
namespace trix
{
+using ModelID = uint32_t;
+
+/**
+ * @brief NPU device context of trix backend
+ *
+ */
class DevContext
{
public:
- DevContext()
- {
- auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
- // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
- if (device_count > 2)
- {
- device_count = 2;
- }
-
- if (device_count <= 0)
- {
- throw std::runtime_error("Unable to find TRIX NPU device");
- }
-
- for (int i = 0; i < device_count; i++)
- {
- npudev_h h;
- if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
- {
- throw std::runtime_error("Failed to get TRIX NPU device handle");
- }
- _dev_handles.push_back(h);
- }
- }
-
- ~DevContext()
- {
- for (auto h : _dev_handles)
- {
- if (h != nullptr)
- {
- unregisterNPUmodel_all(h);
- putNPUdevice(h);
- }
- }
- }
-
- npudev_h getDev(int i) { return _dev_handles[i]; }
- int getDevSize() { return _dev_handles.size(); }
-
- template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
- {
- info->num_info = static_cast<uint32_t>(tensors.size());
-
- for (uint32_t idx = 0; idx < info->num_info; ++idx)
- {
- info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
- info->info[idx].type = convertDataType(tensors[idx]->data_type());
- }
- }
-
- template <typename T>
- void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
- {
- buf->num_buffers = static_cast<uint32_t>(tensors.size());
-
- for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
- {
- buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
- buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
- buf->bufs[idx].type = BUFFER_MAPPED;
- }
- }
+ /**
+ * @brief Construct a new device Context object
+ *
+ */
+ DevContext();
+
+ /**
+ * @brief Destroy the device Context object
+ *
+ */
+ ~DevContext();
+
+ DevContext(const DevContext &) = delete;
+ DevContext &operator=(const DevContext &) = delete;
+
+ /**
+ * @brief Register a trix model for all NPU devices
+ *
+ * @param model_file_path File path of a trix model
+ * @return ModelID Internal ID of the trix model
+ */
+ ModelID registerModel(const std::string &model_file_path);
+
+ /**
+ * @brief Unregister a trix model
+ *
+ * @param model_id Internal ID of the trix model to be unregistered
+ */
+ void unRegisterModel(ModelID model_id);
+
+ /**
+ * @brief Request a trix model to be run on NPU
+ *
+ * @param model_id Internal ID of a trix model
+ * @param input_bufs Buffer data of inputs
+ * @param in_info Data info of inputs
+ * @param output_bufs Buffer data of outputs
+ * @param out_info data info of outputs
+ * @param batch_size Batch size
+ */
+ void requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+ output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size);
private:
- data_layout convertDataLayout(const ir::Layout layout)
- {
- switch (layout)
- {
- case ir::Layout::NCHW:
- return DATA_LAYOUT_NCHW;
- case ir::Layout::NHWC:
- return DATA_LAYOUT_NHWC;
- default:
- throw std::runtime_error("Unknown Layout");
- }
- }
-
- data_type convertDataType(const ir::DataType type)
- {
- switch (type)
- {
- case ir::DataType::QUANT_UINT8_ASYMM:
- return DATA_TYPE_QASYMM8;
- case ir::DataType::QUANT_INT16_SYMM:
- return DATA_TYPE_QSYMM16;
- default:
- throw std::runtime_error("Unsupported data type");
- }
- }
+ /**
+ * @brief Rquest one batch of a trix model to be run on a device of NPU
+ *
+ * @param dev_num Device number
+ * @param model_id Internal ID of a trix model
+ * @param input_bufs Buffer data of inputs
+ * @param in_info Data info of inputs
+ * @param output_bufs Buffer data of outputs
+ * @param out_info data info of outputs
+ */
+ void runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+ tensors_data_info *in_info, output_buffers *output_bufs,
+ tensors_data_info *out_info);
+
+ /**
+ * @brief Set the buffer object by batch
+ *
+ * @param origin_buf Buffer object that has all batches
+ * @param batch_num Batch number
+ * @param batch_offset Size of a batch
+ * @param batch_buf One batch buffer object to be set
+ */
+ void setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num, uint64_t batch_offset,
+ generic_buffer *batch_buf);
private:
- // NPU device handles
+ /**
+ * @brief NPU device handles
+ *
+ */
std::vector<npudev_h> _dev_handles;
+
+ /**
+ * @brief Threadpool for batch-by-batch multi-threading
+ *
+ */
+ std::unique_ptr<BatchThreadPool> _batch_thread_pool;
+
+ // TODO Change key to internal trix model context(?) if it is needed
+ /**
+ * @brief Map for ID of models
+ * Internal Model ID : Model ID array for each device
+ *
+ */
+ std::unordered_map<ModelID, std::vector<uint32_t>> _model_ids;
+
+ /**
+ * @brief Map for meta data
+ * Model ID at each device : meta data
+ *
+ */
+ std::unordered_map<uint32_t, std::shared_ptr<npubin_meta>> _meta_map;
+
+ /**
+ * @brief Exception pointer captured whthin threads
+ *
+ */
+ std::exception_ptr _eptr;
};
} // namespace trix
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
index 68e6840dd..2783bd75b 100644
--- a/runtime/onert/backend/trix/KernelGenerator.cc
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -61,11 +61,11 @@ void KernelGenerator::visit(const ir::operation::Bulk &node)
using ir::operation::Bulk;
std::vector<IPortableTensor *> output_tensors;
- for (auto &ofm_idx : node.getOutputs())
+ for (const auto &ofm_idx : node.getOutputs())
output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
// parameters
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
index 3c49da9a3..db5c81ba7 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.cc
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -15,10 +15,8 @@
*/
#include "BulkLayer.h"
-#include <util/logging.h>
-#include <libnpuhost.h>
-#include <future>
+#include "../Convert.h"
namespace onert
{
@@ -29,12 +27,12 @@ namespace trix
namespace ops
{
-BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _dev_context(nullptr)
{
// DO NOTHING
}
-BulkLayer::~BulkLayer() { free(_meta); }
+BulkLayer::~BulkLayer() { _dev_context->unRegisterModel(_model_id); }
void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
std::vector<IPortableTensor *> &outputs, std::string binary_path,
@@ -43,133 +41,28 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
_inputs = inputs;
_outputs = outputs;
_dev_context = dev_context;
-
- _meta = getNPUmodel_metadata(binary_path.c_str(), false);
- if (_meta == nullptr)
- {
- throw std::runtime_error("Unable to extract the model metadata");
- }
-
- _model_id.resize(_dev_context->getDevSize());
-
- generic_buffer model_file;
- model_file.type = BUFFER_FILE;
- model_file.filepath = binary_path.c_str();
- model_file.size = _meta->size;
-
- for (int i = 0; i < _dev_context->getDevSize(); i++)
- {
- if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
- {
- throw std::runtime_error("Failed to register npu model");
- }
- }
-}
-
-void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
- output_buffers *output_buf, tensors_data_info *out_info)
-{
- if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
- {
- throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
- ")");
- }
-
- if (submitNPU_request(dev, req_id))
- {
- throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
- ")");
- }
+ _model_id = _dev_context->registerModel(binary_path);
}
void BulkLayer::run()
{
- // TODO: Remove too many assumption
- // We assume user wants batch execution if user's input size is multiples of model's input size
- int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
- int model_input_batch = _meta->input_seg_dims[0][0];
- int batch_size = user_input_batch / model_input_batch;
- bool is_batch_execution = (batch_size != 1 ? true : false);
-
- std::vector<int> req_id(_dev_context->getDevSize());
-
- for (int i = 0; i < _dev_context->getDevSize(); i++)
- {
- if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
- {
- throw std::runtime_error("Unable to create NPU request with model id (" +
- std::to_string(_model_id[i]) + ")");
- }
- }
-
- if (_meta->input_seg_num != _inputs.size())
- {
- throw std::runtime_error("input size does not match to model input seg num");
- }
-
- if (_meta->output_seg_num != _outputs.size())
- {
- throw std::runtime_error("output size does not match to model output seg num");
- }
-
tensors_data_info in_info;
tensors_data_info out_info;
- _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
- _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+ setDataInfo(_inputs, &in_info);
+ setDataInfo(_outputs, &out_info);
- std::vector<input_buffers> input_buf;
- std::vector<output_buffers> output_buf;
- input_buf.resize(_dev_context->getDevSize());
- output_buf.resize(_dev_context->getDevSize());
-
- std::vector<std::future<void>> f(_dev_context->getDevSize());
-
- const int num_cores = _dev_context->getDevSize();
- if (is_batch_execution)
- {
- // TODO: Support for general number of cores(>2)
- // Here we assume that 2 trix cores
- for (int i = 0; i < (batch_size); i = i + num_cores)
- {
- for (int core = 0; core < num_cores; core++)
- {
- _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
- i + core);
- _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
- }
- for (int core = 0; core < num_cores; core++)
- {
-
- if (i + core < batch_size)
- {
- f[core] =
- std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
- &input_buf[core], &in_info, &output_buf[core], &out_info);
- }
- }
- for (int core = 0; core < num_cores; core++)
- {
- f[core].wait();
- }
- }
- }
- else
- {
- _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
- _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
-
- single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
- &out_info);
- }
+ input_buffers input_bufs;
+ output_buffers output_bufs;
+ setBuffers(_inputs, &input_bufs);
+ setBuffers(_outputs, &output_bufs);
- for (int i = 0; i < _dev_context->getDevSize(); i++)
+ size_t batch_size = 1;
+ // TODO Remove this assumption
+ if (_inputs.size() == 1 && _outputs.size() == 1 && _inputs.at(0)->getShape().dim(0) > 1)
{
- if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
- {
- throw std::runtime_error("Unable to remove NPU request with req id (" +
- std::to_string(req_id[i]) + ")");
- }
+ batch_size = _inputs.at(0)->getShape().dim(0);
}
+ _dev_context->requestRun(_model_id, &input_bufs, &in_info, &output_bufs, &out_info, batch_size);
}
void BulkLayer::prepare()
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
index 614c0f728..6590b6989 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.h
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -50,8 +50,7 @@ private:
std::vector<const IPortableTensor *> _inputs;
std::vector<IPortableTensor *> _outputs;
- std::vector<uint32_t> _model_id;
- npubin_meta *_meta;
+ ModelID _model_id;
std::shared_ptr<DevContext> _dev_context;
};
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
index 42fffb608..c52e275aa 100644
--- a/runtime/onert/backend/xnnpack/BackendContext.cc
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index 87c7a13e4..8041ab5bc 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -57,4 +57,4 @@ target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
-install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest_standalone)
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest)
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index cf2da4c34..970a9f71c 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -84,19 +84,23 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
tensor_builder->notifyFirstUse(ind);
}
- for (auto &pair : def_map)
+ for (const auto &pair : def_map)
{
- if (pair.second == 0)
- tensor_builder->notifyFirstUse(pair.first);
+ const auto &ind = pair.first;
+ const auto def_count = pair.second;
+ if (def_count == 0)
+ tensor_builder->notifyFirstUse(ind);
}
// This is a workaround to keep the operands over the execution
// (the operands look like they are unused)
std::vector<ir::OperandIndex> operands_last_until_end;
- for (auto &pair : uses_map)
+ for (const auto &pair : uses_map)
{
- if (pair.second == 0)
- operands_last_until_end.push_back(pair.first);
+ const auto &ind = pair.first;
+ const auto use_count = pair.second;
+ if (use_count == 0)
+ operands_last_until_end.push_back(ind);
}
// At each operation,
@@ -161,7 +165,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
}
}
- for (auto &ind : operands_last_until_end)
+ for (const auto &ind : operands_last_until_end)
{
tensor_builder->notifyLastUse(ind);
}
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index f05d63c66..9a86f407e 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -22,76 +22,19 @@
#ifndef __ONERT_COMPILER_COMPILE_H_
#define __ONERT_COMPILER_COMPILE_H_
+#include "CompilerOptions.h"
+#include "ICompiler.h"
#include "ir/NNPkg.h"
-#include "exec/Executors.h"
-#include "util/TracingCtx.h"
namespace onert
{
-
namespace compiler
{
-enum class State
-{
- CREATED, // Before compilation
- COMPILED // Success compilation
-};
-
-struct ManualSchedulerOptions
-{
-public:
- void setBackendMap(const std::string &str);
-
-public:
- std::string backend_for_all;
- std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
- std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
-};
-
-struct PartialGraphOptions
-{
- std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
-};
-
-class CompilerOptions
-{
-public:
- // Set default values for CompilerOptions
- // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
- static std::unique_ptr<CompilerOptions> fromGlobalConfig();
-
-public:
- // GENERAL OPTIONS
- std::vector<std::string> backend_list;
-
- // OPTIONS ONLY FOR DEBUGGING/PROFILING
- std::string trace_filepath; //< File path to save trace records
- int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
- std::string executor; //< Executor name to use
- ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
- bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
- bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
- bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
- bool fp16_enable; //< Whether fp16 mode ON/OFF
- PartialGraphOptions partial_graph_options;
-};
-
-struct CompilerArtifact
-{
- CompilerArtifact(void) = delete;
- CompilerArtifact(std::shared_ptr<exec::Executors> executors,
- std::unique_ptr<const util::TracingCtx> tracing_ctx)
- : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
-
- std::shared_ptr<exec::Executors> _executors;
- std::unique_ptr<const util::TracingCtx> _tracing_ctx;
-};
-
/**
* @brief Class to compile NN package
*/
-class Compiler
+class Compiler : public ICompiler
{
public:
/**
@@ -109,55 +52,25 @@ public:
Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
std::vector<std::unique_ptr<CompilerOptions>> &copts);
-public:
/**
- * @brief Do compilation with the options
- *
- * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ * @brief Destroy the Compiler object
*/
- std::shared_ptr<CompilerArtifact> compile(void);
+ ~Compiler() = default;
+public:
/**
* @brief Do compilation with the options
*
- * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
- * for pipeline
- */
- std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
- const char *map_file_path);
-
- State state(void) const { return _state; }
-
- /**
- * @brief Allow to compute float32 using float16 data type
- */
- void enableToFp16();
-
- /**
- * @brief Build the partial graphs to compile with original graph
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
*/
- bool buildPartialGraph(uint32_t num_graphs);
-
-private:
- void checkProfilerConditions();
- std::shared_ptr<ir::Graph> &primary_subgraph()
- {
- return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
- }
+ std::shared_ptr<CompilerArtifact> compile(void);
private:
- std::shared_ptr<ir::NNPkg> _nnpkg;
- // NOTE These executors does not have duplicated subgraph. This mean they do not allow support
- // subgraphs being called recursively because data of non-constant tensor of parent executor will
- // be updated by child executor. If you want to support subgraphs being called recursively, you
- // have to add allocate non-constant tensor memory of executors in execution time when each
- // subgraph is called.
- State _state;
- std::vector<CompilerOptions *> _voptions;
+ std::shared_ptr<ir::Model> _model;
+ CompilerOptions *_options;
};
} // namespace compiler
-
} // namespace onert
#endif // __ONERT_COMPILER_COMPILE_H_
diff --git a/runtime/onert/core/include/compiler/CompilerFactory.h b/runtime/onert/core/include/compiler/CompilerFactory.h
new file mode 100644
index 000000000..4894366a2
--- /dev/null
+++ b/runtime/onert/core/include/compiler/CompilerFactory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_FACTORY_H__
+#define __ONERT_COMPILER_COMPILER_FACTORY_H__
+
+#include "ICompiler.h"
+#include "CompilerOptions.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+// TODO Support register and use compiler plugin
+class CompilerFactory
+{
+public:
+ static CompilerFactory &get();
+
+public:
+ std::unique_ptr<ICompiler> create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+private:
+ // It is not allowed to use CompilerFactory without get()
+ CompilerFactory() = default;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_FACTORY_H__
diff --git a/runtime/onert/core/include/compiler/CompilerOptions.h b/runtime/onert/core/include/compiler/CompilerOptions.h
new file mode 100644
index 000000000..bbe15fc06
--- /dev/null
+++ b/runtime/onert/core/include/compiler/CompilerOptions.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_OPTIONS_H_
+#define __ONERT_COMPILER_COMPILER_OPTIONS_H_
+
+#include "ir/OpCode.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace onert
+{
+namespace compiler
+{
+
+struct ManualSchedulerOptions
+{
+public:
+ void setBackendMap(const std::string &str);
+
+public:
+ std::string backend_for_all;
+ std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
+ std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
+};
+
+class CompilerOptions
+{
+public:
+ /**
+ * @brief Set default values for CompilerOptions
+ * @return Generated CompileOption
+ *
+ * @note All these default values should not be fetched from Env
+ * when we stop supporting Android NNAPI.
+ */
+ static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+ /**
+ * @brief Allow to compute float32 using float16 data type
+ */
+ void enableToFp16() { fp16_enable = true; }
+
+ /**
+ * @brief Force default values of CompilerOptions for correct compilations
+ *
+ * @note This should be called after CompilerOptions setting is finished
+ * to prevent value overwriting
+ */
+ void forceInternalOptions();
+
+ /**
+ * @brief Print option value
+ */
+ void verboseOptions();
+
+public:
+ // GENERAL OPTIONS
+ std::vector<std::string> backend_list;
+
+ // OPTIONS ONLY FOR DEBUGGING/PROFILING
+ std::string trace_filepath; //< File path to save trace records
+ int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
+ std::string executor; //< Executor name to use
+ ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
+ bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
+ bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
+ bool fp16_enable; //< Whether fp16 mode ON/OFF
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_OPTIONS_H_
diff --git a/runtime/onert/core/include/compiler/ICompiler.h b/runtime/onert/core/include/compiler/ICompiler.h
new file mode 100644
index 000000000..255e0509d
--- /dev/null
+++ b/runtime/onert/core/include/compiler/ICompiler.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ICompiler.h
+ * @brief This file contains ICompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_I_COMPILER_H_
+#define __ONERT_COMPILER_I_COMPILER_H_
+
+#include "exec/IExecutors.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct CompilerArtifact
+{
+ CompilerArtifact(void) = delete;
+ CompilerArtifact(std::shared_ptr<exec::IExecutors> executors,
+ std::unique_ptr<const util::TracingCtx> tracing_ctx)
+ : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+ std::shared_ptr<exec::IExecutors> _executors;
+ std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
+
+class ICompiler
+{
+public:
+ /**
+ * @brief Virtual ICompiler destructor
+ * @note Require derived class destructor
+ */
+ virtual ~ICompiler() = default;
+
+ /**
+ * @brief Do compilation
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ virtual std::shared_ptr<CompilerArtifact> compile(void) = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_I_COMPILER_H_
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index 7264f2a10..e9f0ae0de 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -36,13 +36,9 @@ class LoweredGraph
{
public:
LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
- LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
- const compiler::CompilerOptions &options);
ir::Graph &graph() { return _graph; }
const ir::Graph &graph() const { return _graph; }
- ir::Graph &parent_graph() { return _parent_graph; }
- const ir::Graph &parent_graph() const { return _parent_graph; }
const compiler::GraphLowerInfo &lower_info() const { return _lower_info_map; }
compiler::GraphLowerInfo &lower_info() { return _lower_info_map; }
std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
@@ -69,7 +65,6 @@ private:
* It allows the original graph can be compiled multiple times.
*/
ir::Graph _graph;
- ir::Graph _parent_graph;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
compiler::GraphLowerInfo _lower_info_map;
ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index f701dc207..94d6ba1a7 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -101,6 +101,15 @@ public:
void dump();
+ /**
+ * @brief Create a lowered model shape inferer map
+ * @param[in] lowered_subgs lowered model subgraph map
+ * @return Shape inferer map
+ */
+ static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+ createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs);
+
private:
bool checkDynamicInput(const ir::Operation &op);
bool checkDynamicOutput(const ir::Operation &op);
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
index 1e8083c4c..ba3edcdd6 100644
--- a/runtime/onert/core/include/exec/Execution.h
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -22,7 +22,7 @@
#define __ONERT_EXEC_EXECUTION_H__
#include "ir/Layout.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "IODescription.h"
#include <thread>
@@ -46,16 +46,15 @@ public:
* @brief Construct a new Execution object
* @param[in] executor Model executor
*/
- Execution(const std::shared_ptr<Executors> &executors);
+ Execution(const std::shared_ptr<IExecutors> &executors);
public:
/**
* @brief Returns primary graph object
* @return Graph object
*/
- const ir::Graph &primary_subgraph() const { return primary_executor()->graph(); }
+ const ir::Graph &primary_subgraph() const { return entryExecutor()->graph(); }
- const ir::Graph &primary_parentgraph() const { return primary_executor()->parent_graph(); }
/**
* @brief Change input shape
* @param[in] index Input index
@@ -146,121 +145,15 @@ public:
ir::Shape getInputShape(ir::IOIndex ind) const;
ir::Shape getOutputShape(ir::IOIndex ind) const;
- //
- // Experimental API
- //
-
- // accessor
- std::vector<
- std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
- getNextExes()
- {
- return next_exes;
- }
- std::deque<std::pair<IODescription *, uint32_t>> *getAsyncIoDescs() { return &_async_io_descs; }
- std::deque<std::vector<void *>> *getAsyncResults() { return &_async_results; }
-
- /**
- * @brief Push IO information between related executions into next_exes
- * @param[in] next address of next execution
- * @param[in] o_index Output index of current execution (it will be the input of next execution)
- * @param[in] i_index Input index of next execution
- */
- void pushNextExe(std::shared_ptr<onert::exec::Execution> next, onert::ir::IOIndex o_index,
- onert::ir::IOIndex i_index)
- {
- next_exes.push_back({next, o_index, i_index});
- }
-
- /**
- * @brief Create New IODescription instance for new inputs outputs
- * @param[in] index instance count number
- */
- void createNewAsyncDesc(uint32_t count = 0);
-
- /**
- * @brief Set async input data's information
- * @param[in] index Input index
- * @param[in] buffer Input data's buffer pointer
- * @param[in] length Input data's length
- * @param[in] layout Input data's data format
- */
- void executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
-
- /**
- * @brief Set async output data's information
- * @param[in] index Output index
- * @param[in] buffer Output data's buffer pointer
- * @param[in] length Output data's length
- * @param[in] layout Output data's data format
- */
- void executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
-
- /**
- * @brief Async execution
- * @note It should be called after setting input and output buffer
- */
- void AsyncExecute();
-
- /**
- * @brief Set finish
- */
- void setFinish();
-
- /**
- * @brief Check if input queue is empty
- * @return @c true if queue is empty, otherwise @c false
- */
- bool isEmptyQueue();
-
- /**
- * @brief Wait semaphore to prevent race condition
- */
- void asyncIoDescSemWait();
-
- /**
- * @brief Post semaphore to prevent race condition
- */
- void asyncIoDescSemPost();
-
- /**
- * @brief Inference
- * @note this function provided to the thread for pipelining
- */
- void runInference();
-
- /**
- * @brief Check if stop_wait is true
- * @return @c true if stop_wait is true, otherwise @c false
- */
- bool stopWait(void) const;
-
- /**
- * @brief Set stop_wait to terminate consumer thread
- */
- void sholudStop();
-
private:
- const std::unique_ptr<IExecutor> &primary_executor() const
- {
- return _executors->at(ir::SubgraphIndex{0});
- };
- std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
+ const IExecutor *entryExecutor() const { return _executors->entryExecutor(); };
+ IExecutor *entryExecutor() { return _executors->entryExecutor(); };
private:
- const std::shared_ptr<Executors> _executors;
+ const std::shared_ptr<IExecutors> _executors;
IODescription _io_desc;
- std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
- sem_t _async_io_descs_sem;
- std::deque<std::vector<void *>> _async_results;
- std::vector<
- std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
- next_exes;
std::unique_ptr<std::thread> _exec_thread;
bool finished{false};
- bool stop_wait{false};
};
} // namespace exec
diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h
deleted file mode 100644
index 5adb0eda4..000000000
--- a/runtime/onert/core/include/exec/Executors.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_EXECUTORS_H__
-#define __ONERT_EXEC_EXECUTORS_H__
-
-#include "IExecutor.h"
-#include "ir/NNPkg.h"
-
-namespace onert
-{
-namespace exec
-{
-
-/**
- * @brief Class to gather executors
- */
-class Executors
-{
-public:
- Executors(void) = default;
- Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
- Executors(const Executors &) = delete;
- Executors(Executors &&) = default;
-
- // TODO Use Executor index
- void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
- {
- _executors.emplace(idx, std::move(exec));
- }
-
- std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
-
- uint32_t inputSize() const;
-
- uint32_t outputSize() const;
-
- const ir::OperandInfo inputInfo(const ir::IOIndex &index);
-
- const ir::OperandInfo outputInfo(const ir::IOIndex &index);
-
- void execute(const IODescription &desc);
-
-private:
- void executeEntries(const IODescription &desc);
-
-private:
- // TODO Use Executor index
- // Changing index will effect if/while compile and kernel implementation
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
- // NOTE _model_edges may use different struct type for executor implementation
- std::unique_ptr<ir::ModelEdges> _model_edges;
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index 7ff6d8b8c..a7020d425 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -66,7 +66,7 @@ public:
template <typename T, typename... Args> void wrap(Args &&... args)
{
- for (auto &function : _functions)
+ for (auto &&function : _functions)
{
function = std::make_unique<T>(std::move(function), args...);
}
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index bb5b5af98..46dbcd033 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -46,7 +46,6 @@ namespace onert
{
namespace exec
{
-class IExecutionObserver;
/**
* @brief Struct to define interface of Executor
*/
@@ -66,14 +65,7 @@ struct IExecutor
*
* @return Graph object
*/
- virtual const ir::Graph &graph() = 0;
-
- /**
- * @brief Returns parent graph object
- *
- * @return Graph object
- */
- virtual const ir::Graph &parent_graph() = 0;
+ virtual const ir::Graph &graph() const = 0;
/**
* @brief Set an ordering on operations
@@ -100,6 +92,13 @@ struct IExecutor
const std::vector<backend::IPortableTensor *> &outputs) = 0;
/**
+ * @brief Get input tensor objects
+ *
+ * @return Vector of @c IOTensor
+ */
+ virtual const std::vector<backend::builtin::IOTensor *> &getInputTensors() const = 0;
+
+ /**
* @brief Get output tensor objects
*
* @return Vector of @c IOTensor
diff --git a/runtime/onert/core/include/exec/IExecutors.h b/runtime/onert/core/include/exec/IExecutors.h
new file mode 100644
index 000000000..013da716b
--- /dev/null
+++ b/runtime/onert/core/include/exec/IExecutors.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_I_EXECUTORS_H__
+#define __ONERT_EXEC_I_EXECUTORS_H__
+
+#include "IExecutor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather NN package's executor set
+ */
+class IExecutors
+{
+public:
+ /**
+ * @brief Virtual IExecutors destructor
+ * @note Require derived class destructor
+ */
+ virtual ~IExecutors() = default;
+
+public:
+ /**
+ * @brief Insert executor in executor set
+ * @param[in] model_index Model index
+ * @param[in] subg_index Subgraph index
+ * @param[in] exec Executor to insert
+ *
+ * @todo Use Executor index
+ */
+ virtual void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) = 0;
+
+ /**
+ * @brief Return executor of index
+ * @param[in] model_index Model index
+ * @param[in] subg_index Subgraph index
+ * @return Executor
+ */
+ virtual IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const = 0;
+
+ IExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+ /**
+ * @brief Return executor set's number of input
+ * @return Number of input
+ */
+ virtual uint32_t inputSize() const = 0;
+
+ /**
+ * @brief Return executor set's number of output
+ * @return Number of output
+ */
+ virtual uint32_t outputSize() const = 0;
+
+ /**
+ * @brief Return NN package input tensor info
+ * @param[in] index Input index
+ * @return Tensor info
+ */
+ virtual const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const = 0;
+
+ /**
+ * @brief Return NN package output tensor info
+ * @param[in] index Output index
+ * @return Tensor info
+ */
+ virtual const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const = 0;
+
+ /**
+ * @brief Execute NN package executor set
+ * @param[in] desc Input and output buffer description
+ */
+ virtual void execute(const IODescription &desc) = 0;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_EXECUTORS_H__
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index 286caf72f..1783cdca0 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -89,15 +89,6 @@ public:
void verify(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
void setLayout(Layout layout) { _layout = layout; }
- void setPartialModel(const std::shared_ptr<Model> &partial_model)
- {
- _partialgraphs = partial_model;
- }
- void
- setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
- {
- _tensor_names = tensor_names;
- }
private:
bool checkOperandsForOperation(const Operation &operation);
@@ -136,29 +127,6 @@ public:
const Operations &operations() const { return _operations; }
Operations &operations() { return _operations; }
Layout layout() const { return _layout; }
- std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
- {
- return _tensor_names;
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
- {
- return _name_to_input.begin();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
- {
- return _name_to_input.end();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
- {
- return _name_to_output.begin();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
- {
- return _name_to_output.end();
- }
- void input_sort() { _inputs.sort(); }
- void output_sort() { _outputs.sort(); }
// Topological sort
public:
@@ -173,10 +141,6 @@ private:
std::unordered_map<std::string, IOIndex> _name_to_output;
// TFLite and circle's default layout is NHWC;
Layout _layout{Layout::NHWC};
-
- // model for partial graphs
- std::shared_ptr<ir::Model> _partialgraphs;
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
};
} // namespace ir
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
index f01a4c84d..1864c3bdb 100644
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -36,10 +36,10 @@ struct IOIndexTag;
using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
struct SubgraphIndexTag;
-using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+using SubgraphIndex = ::onert::util::Index<uint16_t, SubgraphIndexTag>;
struct ModelIndexTag;
-using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+using ModelIndex = ::onert::util::Index<uint16_t, ModelIndexTag>;
template <typename IndexType>
std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h
index d9f825e85..b23745d55 100644
--- a/runtime/onert/core/include/ir/NNPkg.h
+++ b/runtime/onert/core/include/ir/NNPkg.h
@@ -21,6 +21,7 @@
#include <unordered_set>
#include <vector>
+#include "ir/Graph.h"
#include "ir/Index.h"
#include "ir/Model.h"
@@ -89,7 +90,7 @@ public:
~NNPkg() = default;
NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
- std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+ std::shared_ptr<Model> primary_model() const { return _models.at(onert::ir::ModelIndex{0}); }
/**
* @brief Put model at index
@@ -180,6 +181,91 @@ public:
*/
const ModelEdges &model_edges() { return _edges; }
+ /**
+ * @brief Verify NNPkg
+ *
+ */
+ void verify(void)
+ {
+ // Verify edges information
+ //
+ // Only duplicates of nnpkg output and Edge `from` are possible.
+ // | Whether duplicates are possible | Edge `to` | Edge `from` |
+ // | nnpkg input (input of subgraph) | X (*1) | X (*2) |
+ // | nnpkg output (output of subgraph) | X (*2) | O |
+ // *1. The subjects who determine values of each buffer are different.
+ // - nnpkg input : user input
+ // - Edge `to` : output of another subgraph
+ // *2. `IOIndex` of inputs and outputs of subgraph is distinct.
+ //
+ for (const auto &edge : _edges.edges)
+ {
+ if (std::find(_edges.pkg_inputs.begin(), _edges.pkg_inputs.end(), edge.to) !=
+ _edges.pkg_inputs.end())
+ {
+ throw std::runtime_error{
+ "Invalid edge information. NNPkg inputs and Edge `to` cannot be duplicated"};
+ }
+ }
+ }
+
+ // TODO Find better way to handle single model NNPackage and multi model NNPackage on inputSize(),
+ // outputSize(), inputInfo(), outputInfo()
+
+ /**
+ * @brief Get model input size
+ */
+ uint32_t inputSize() const
+ {
+ return _models.size() == 1 ? primary_model()->primary_subgraph()->getInputs().size()
+ : _edges.pkg_inputs.size();
+ }
+
+ /**
+ * @brief Get model output size
+ */
+ uint32_t outputSize() const
+ {
+ return _models.size() == 1 ? primary_model()->primary_subgraph()->getOutputs().size()
+ : _edges.pkg_outputs.size();
+ }
+
+ /**
+ * @brief Get model input info
+ */
+ OperandInfo &inputInfo(uint32_t index) const
+ {
+ if (_models.size() == 1)
+ {
+ auto const graph = primary_model()->primary_subgraph();
+ auto const operand_index = graph->getInputs().at(index);
+ return graph->operands().at(operand_index).info();
+ }
+
+ auto const &desc = input(index);
+ auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+ auto const operand_index = graph->getInputs().at(std::get<IOIndex>(desc).value());
+ return graph->operands().at(operand_index).info();
+ }
+
+ /**
+ * @brief Get model output info
+ */
+ OperandInfo &outputInfo(uint32_t index) const
+ {
+ if (_models.size() == 1)
+ {
+ auto const graph = primary_model()->primary_subgraph();
+ auto const operand_index = graph->getOutputs().at(index);
+ return graph->operands().at(operand_index).info();
+ }
+
+ auto const &desc = output(index);
+ auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+ auto const operand_index = graph->getOutputs().at(std::get<IOIndex>(desc).value());
+ return graph->operands().at(operand_index).info();
+ }
+
// TODO: Add iterate() or getter for edges
private:
@@ -190,4 +276,18 @@ private:
} // namespace ir
} // namespace onert
+namespace std
+{
+
+template <> struct hash<onert::ir::IODesc>
+{
+ size_t operator()(const ::onert::ir::IODesc &iodesc) const noexcept
+ {
+ return (std::get<0>(iodesc).value() << 24) | (std::get<1>(iodesc).value() << 16) |
+ std::get<2>(iodesc).value();
+ }
+};
+
+} // namespace std
+
#endif // __ONERT_IR_NNPKG_H__
diff --git a/runtime/onert/core/include/ir/OperandIndexSequence.h b/runtime/onert/core/include/ir/OperandIndexSequence.h
index dd390748b..846c3f950 100644
--- a/runtime/onert/core/include/ir/OperandIndexSequence.h
+++ b/runtime/onert/core/include/ir/OperandIndexSequence.h
@@ -19,7 +19,6 @@
#include <initializer_list>
#include <vector>
-#include <algorithm>
#include "ir/Index.h"
@@ -46,12 +45,6 @@ public:
void append(const OperandIndex &index) { _vec.emplace_back(index); }
void append(const OperandIndexSequence &l) { _vec.insert(_vec.end(), l.begin(), l.end()); }
- void sort()
- {
- std::sort(_vec.begin(), _vec.end(),
- [](const auto &lhs, const auto &rhs) { return lhs.value() < rhs.value(); });
- }
-
public:
uint32_t size() const { return static_cast<uint32_t>(_vec.size()); }
const OperandIndex &at(IOIndex set_index) const { return _vec.at(set_index.value()); }
diff --git a/runtime/onert/core/include/ir/Shape.h b/runtime/onert/core/include/ir/Shape.h
index ec6dd07af..cf84e2626 100644
--- a/runtime/onert/core/include/ir/Shape.h
+++ b/runtime/onert/core/include/ir/Shape.h
@@ -70,8 +70,8 @@ struct FeatureShape
struct Shape
{
public:
- static int32_t const UNSPECIFIED_DIM;
- static int32_t const MAX_RANK;
+ static int32_t const kUnspecifiedDim;
+ static int32_t const kMaxRank;
Shape() = default;
@@ -126,7 +126,7 @@ public:
*/
bool hasUnspecifiedDims() const
{
- return (std::find(_dimensions.begin(), _dimensions.end(), UNSPECIFIED_DIM) !=
+ return (std::find(_dimensions.begin(), _dimensions.end(), kUnspecifiedDim) !=
_dimensions.end());
}
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 4bbc02ac3..b9bad1b59 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -23,7 +23,6 @@ CONFIG(GRAPH_DOT_DUMP , int , "0")
CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
-CONFIG(DISABLE_COMPILE , bool , "0")
CONFIG(ONERT_LOG_ENABLE , bool , "0")
CONFIG(CPU_MEMORY_PLANNER , std::string , "WIC")
CONFIG(EXECUTOR , std::string , "Linear")
diff --git a/runtime/onert/core/include/util/Index.h b/runtime/onert/core/include/util/Index.h
index d3f3dcb46..49c5f4c6d 100644
--- a/runtime/onert/core/include/util/Index.h
+++ b/runtime/onert/core/include/util/Index.h
@@ -138,6 +138,13 @@ public:
*/
T value() const { return _index; }
+ /**
+ * @brief Return max index value
+ *
+ * @return Maximum valid index value
+ */
+ static T max() { return UNDEFINED - 1; }
+
private:
T _index;
};
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
index 36b6c85c8..077a4c2ef 100644
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -202,12 +202,12 @@ public:
// This implementation is a workaround in case of adding operands while iteration
std::list<Index> l;
- for (auto &e : _objects)
+ for (const auto &e : _objects)
{
l.push_back(e.first);
}
- for (auto &index : l)
+ for (const auto &index : l)
{
fn(index, *_objects[index]);
}
diff --git a/runtime/onert/core/include/util/Utils.h b/runtime/onert/core/include/util/Utils.h
index 8a4eea32b..505f5a9b3 100644
--- a/runtime/onert/core/include/util/Utils.h
+++ b/runtime/onert/core/include/util/Utils.h
@@ -29,9 +29,9 @@
template <size_t from, size_t to, typename Enable = void> struct ForEachDimension
{
- template <typename L, typename... Args>
+ template <typename L>
static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
- L &&lambda_function, Args &&... args)
+ L lambda_function)
{
static_assert(from < to, "from must not be less than to");
assert(static_cast<int>(to) <= shape.rank());
@@ -40,8 +40,7 @@ template <size_t from, size_t to, typename Enable = void> struct ForEachDimensio
for (auto v = 0; v < d; v++)
{
coords.set(from, v);
- ForEachDimension<from + 1, to>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<from + 1, to>::unroll(shape, coords, lambda_function);
}
}
};
@@ -49,18 +48,17 @@ template <size_t from, size_t to, typename Enable = void> struct ForEachDimensio
template <size_t from, size_t to>
struct ForEachDimension<from, to, typename std::enable_if<from == to>::type>
{
- template <typename L, typename... Args>
+ template <typename L>
static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
- L &&lambda_function, Args &&... args)
+ L lambda_function)
{
UNUSED_RELEASE(shape);
assert(static_cast<int>(to) <= shape.rank());
- lambda_function(coords, std::forward<Args>(args)...);
+ lambda_function(coords);
}
};
-template <typename L, typename... Args>
-inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &&... args)
+template <typename L> inline void ShapeLoop(const onert::ir::Shape &shape, L lambda_function)
{
assert(shape.rank() > 0);
for (auto i = 0; i < shape.rank(); ++i)
@@ -73,32 +71,25 @@ inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &
{
case 0:
coords.set(0, 0);
- ForEachDimension<0, 0>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 0>::unroll(shape, coords, lambda_function);
break;
case 1:
- ForEachDimension<0, 1>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 1>::unroll(shape, coords, lambda_function);
break;
case 2:
- ForEachDimension<0, 2>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 2>::unroll(shape, coords, lambda_function);
break;
case 3:
- ForEachDimension<0, 3>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 3>::unroll(shape, coords, lambda_function);
break;
case 4:
- ForEachDimension<0, 4>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 4>::unroll(shape, coords, lambda_function);
break;
case 5:
- ForEachDimension<0, 5>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 5>::unroll(shape, coords, lambda_function);
break;
case 6:
- ForEachDimension<0, 6>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 6>::unroll(shape, coords, lambda_function);
break;
default:
assert(false && "ShapeLoop, 1 <= Shape'rank <= 6");
diff --git a/runtime/onert/core/src/backend/basic/MemoryManager.cc b/runtime/onert/core/src/backend/basic/MemoryManager.cc
index c468ee458..05fd9cc77 100644
--- a/runtime/onert/core/src/backend/basic/MemoryManager.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryManager.cc
@@ -94,7 +94,7 @@ void DynamicMemoryManager::deallocate(const ITensor *tensor)
void DynamicMemoryManager::deallocate(void)
{
- for (auto &mem_alloc : _mem_alloc_map)
+ for (auto &&mem_alloc : _mem_alloc_map)
{
// Release memory buffer of mem_alloc
mem_alloc.second->release();
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
index 1fda57b3d..1c048043c 100644
--- a/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
@@ -58,7 +58,7 @@ void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
{
// Find the right position for claiming
uint32_t next_offset = 0;
- for (auto &mem_claim : _claim_table)
+ for (const auto &mem_claim : _claim_table)
{
auto claimed_base_offset = mem_claim.first;
auto claimed_size = _mem_plans[mem_claim.second].size;
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index d891814fa..b03eb607c 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -39,7 +39,7 @@ void StaticTensorManager::allocateNonconsts(void)
{
_nonconst_mgr->allocate();
- for (auto &pair : _tensors->native_tensors())
+ for (auto &&pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
auto tensor = pair.second.get();
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc
index 8a6cddcfb..c1a2ed537 100644
--- a/runtime/onert/core/src/backend/builtin/BackendContext.cc
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc
@@ -44,7 +44,7 @@ FunctionMap BackendContext::genKernels()
const_cast<ir::Graph *>(graph())->operands().iterate(
[&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h
index a1b2064a1..d94ed0bca 100644
--- a/runtime/onert/core/src/backend/builtin/IOTensor.h
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.h
@@ -47,7 +47,7 @@ public:
public:
void setTensor(IPortableTensor *tensor);
void setUserTensor(uint8_t *buffer, size_t size);
- ir::OperandInfo orig_info() const { return _orig_info; }
+ const ir::OperandInfo &orig_info() const { return _orig_info; }
ir::Layout orig_layout() const { return _orig_layout; }
public:
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
index fa2fc0b94..4533703a6 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -33,8 +33,8 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
- _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
- external_context}
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{},
+ _external_context{external_context}
{
UNUSED_RELEASE(_graph);
UNUSED_RELEASE(_tensor_registries);
@@ -90,7 +90,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
- _external_context);
+ _model_index, _external_context);
_return_fn = std::move(fn);
}
@@ -133,7 +133,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
// WhileLayer just set Executors instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
- input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index,
_dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
index d5931ca26..3c86fe306 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -23,7 +23,7 @@
#include "../../compiler/TensorRegistries.h"
#include "backend/basic/KernelGeneratorBase.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "ir/Graph.h"
namespace onert
@@ -44,12 +44,14 @@ public:
{
_tensor_registries = tensor_registries;
}
- void setExecutors(const std::shared_ptr<exec::Executors> &executors)
+ void setExecutors(const std::shared_ptr<exec::IExecutors> &executors)
{
// FIXME Using shared_ptr's raw pointer!
_executors = executors.get();
}
+ void setModelIndex(const ir::ModelIndex &index) { _model_index = index; }
+
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
private:
@@ -65,7 +67,8 @@ private:
DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
index cdb41960a..51bc5a8f2 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -29,11 +29,11 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::Executors *executors,
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
_then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
- _external_context{external_context}
+ _model_index{model_index}, _external_context{external_context}
{
// At this point, executors may not have executors of then subg and else subg
}
@@ -61,12 +61,12 @@ void IfLayer::run()
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = _executors->at(_then_subg_index).get();
+ subg_exec = _executors->at(_model_index, _then_subg_index);
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = _executors->at(_else_subg_index).get();
+ subg_exec = _executors->at(_model_index, _else_subg_index);
}
subg_exec->execute(_input_tensors, _output_tensors);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
index fa5537a67..8f639ced9 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
#include "../ExternalContext.h"
namespace onert
@@ -37,7 +37,8 @@ public:
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
@@ -48,7 +49,8 @@ private:
const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
index ddaecdf57..600180077 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -64,7 +64,7 @@ void PermuteLayer::optimize()
src_offsets_it->resize(0);
dst_offsets_it->resize(0);
if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
- throw std::runtime_error("data type does not match");
+ continue;
const auto permute_type = [&]() -> PermuteType {
if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
dst->layout() == ir::Layout::NCHW)
@@ -81,6 +81,8 @@ void PermuteLayer::optimize()
return PermuteType::COPY;
}
}();
+
+ // TODO Support different types
auto fn = [&](backend::ITensor &src_tensor) {
dst->access([&](backend::ITensor &dst_tensor) {
// NOTE The buffer of both tensor can be nullptr in this step
@@ -260,8 +262,10 @@ void PermuteLayer::run()
// 1. The tasks for multithreathing was created
// 2. The tasks's size > 1
// 3. Both tensors are not dynamic
+ // 4. Data types of both tensors are different
if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
- src->is_dynamic() || dst->is_dynamic())
+ src->is_dynamic() || dst->is_dynamic() ||
+ underlying_type(src->data_type()) != underlying_type(dst->data_type()))
{
permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
}
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
index 8e006c5ea..c0ca4046c 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -35,12 +35,14 @@ namespace kernel
WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
+ const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors,
+ const ir::ModelIndex &model_index,
basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
_input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
- _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
+ _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{
+ external_context}
{
// At this point, executors may not have executors of cond subg and body subg
}
@@ -57,8 +59,8 @@ void WhileLayer::run()
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = _executors->at(_cond_subg_index).get();
- auto body_exec = _executors->at(_body_subg_index).get();
+ auto cond_exec = _executors->at(_model_index, _cond_subg_index);
+ auto body_exec = _executors->at(_model_index, _body_subg_index);
// Need a temp tensor to hold the cond subgraph output
assert(cond_exec->getOutputTensors().size() == 1);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
index 8551b3d09..40ca4fe23 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
@@ -41,7 +41,8 @@ public:
WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
@@ -52,7 +53,8 @@ private:
const ir::SubgraphIndex _body_subg_index;
const std::vector<backend::IPortableTensor *> _input_tensors;
const std::vector<backend::IPortableTensor *> _output_tensors;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ const ir::ModelIndex _model_index;
basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 7be9c1e3b..45124556b 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -22,543 +22,96 @@
#include "pass/OddOutputPass.h"
#include "pass/PassRunner.h"
#include "pass/UnusedOperandEliminationPass.h"
-#include "../backend/builtin/Config.h"
#include "../dumper/dot/DotDumper.h"
-#include "../interp/InterpExecutor.h"
-#include "../ir/OperationCloner.h"
+#include "../exec/SingleModelExecutors.h"
#include "../ir/OperationDumper.h"
#include "../ir/verifier/Verifier.h"
#include "compiler/StaticShapeInferer.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-#include <misc/polymorphic_downcast.h>
#include <misc/string_helpers.h>
-#include <json/json.h>
-
-// TODO Remove using fstream header
-#include <fstream>
-
-namespace
-{
-
-using namespace onert;
-
-std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
-{
- std::unordered_map<ir::OpCode, std::string>::iterator it;
- std::string opbackends;
-
- for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
- {
- if (!opbackends.empty())
- opbackends = opbackends + ", ";
-
- auto opcode = it->first;
- const std::string opname = ir::toString(opcode);
- opbackends += opname + "=" + it->second;
- }
- return opbackends;
-}
-
-void verboseOptions(compiler::CompilerOptions &options)
-{
- VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
- VERBOSE(Compiler) << "backend_list : "
- << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(),
- "/")
- << std::endl;
- VERBOSE(Compiler) << "trace_filepath : " << options.trace_filepath << std::endl;
- VERBOSE(Compiler) << "graph_dump_level : " << options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "executor : " << options.executor << std::endl;
- VERBOSE(Compiler) << "manual backend_for_all : "
- << options.manual_scheduler_options.backend_for_all << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : "
- << getOpBackends(options.manual_scheduler_options.opcode_to_backend)
- << std::endl;
- VERBOSE(Compiler) << "he_scheduler : " << options.he_scheduler << std::endl;
- VERBOSE(Compiler) << "he_profiling_mode : " << options.he_profiling_mode << std::endl;
- VERBOSE(Compiler) << "disable_compile : " << options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << options.fp16_enable << std::endl
- << std::noboolalpha;
-}
-
-std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
-createStaticShapeInferers(
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &lowered_subgs)
-{
- // Allocate StaticShapeInferer per each subgraph
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
- for (auto &pair : lowered_subgs)
- {
- const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
- inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
- }
-
- // Append observers in all StaticShapeInferers
- for (auto &pair : lowered_subgs)
- {
- const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
-
- // TODO: Change this iteration for all to controlflow iteration
- lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
- const ir::Operation &op) {
- // A Function to append child inferers. These make it possible for a StaticShapeInferer to
- // call StaticShapeInferes of child subgraphs recursively
- auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
- auto *child_inferer = inferers.at(child_subg_idx).get();
- inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
- };
-
- // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
- // to update inputs of child subgraphs
- auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
- std::vector<ir::Operand *> child_subg_inputs;
- auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
- for (const auto &input_idx : child_subg.getInputs())
- {
- auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
- child_subg_inputs.emplace_back(operand_ptr);
- }
- inferers.at(subg_index)
- ->appendSubgInputObserver(child_subg_idx,
- std::make_unique<compiler::OperandObserver>(child_subg_inputs));
- };
-
- // A Function to set controlflow output observers. This makes it possible for a
- // StaticShapeInferer to update outputs of parent controlflow opeerations
- auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
- std::vector<ir::Operand *> cf_outputs;
- auto &subg = lowered_subg->graph();
- for (const auto &output_idx : op.getOutputs())
- {
- auto operand_ptr = subg.operands().getRawPtr(output_idx);
- cf_outputs.emplace_back(operand_ptr);
- }
- inferers.at(child_subg_idx)
- ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
- };
-
- // Append Observers in a StaticShapeInferer
- if (op.opcode() == ir::OpCode::If)
- {
- const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
-
- appendChildInferer(if_op.param().then_subg_index);
- appendChildInferer(if_op.param().else_subg_index);
-
- appendSubgraphInputObserver(if_op.param().then_subg_index);
- appendSubgraphInputObserver(if_op.param().else_subg_index);
-
- setControlFlowOutputObserver(if_op.param().then_subg_index);
- }
- else if (op.opcode() == ir::OpCode::While)
- {
- const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
-
- appendChildInferer(while_op.param().cond_subg_index);
- appendChildInferer(while_op.param().body_subg_index);
-
- appendSubgraphInputObserver(while_op.param().cond_subg_index);
- appendSubgraphInputObserver(while_op.param().body_subg_index);
-
- setControlFlowOutputObserver(while_op.param().body_subg_index);
- }
- });
- }
-
- return inferers;
-}
-
-} // namespace
namespace onert
{
-
namespace compiler
{
-void ManualSchedulerOptions::setBackendMap(const std::string &str)
-{
- // TODO Support multiple subgraphs for manual scheduling
- auto key_val_list = nnfw::misc::split(str, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
- this->index_to_backend.emplace(ir::OperationIndex{key}, val);
- }
-}
-
-std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
-{
- auto o = std::make_unique<CompilerOptions>();
- o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- o->executor = util::getConfigString(util::config::EXECUTOR);
- o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
- o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
- o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
- {
- // Backend for all
- auto &ms_options = o->manual_scheduler_options;
-
- // Default value for op_backend_all is first element in the backend list
- ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
-
-// Opcode to Backend
-#define OP(OpName) \
- { \
- const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
- if (!backend_str.empty()) \
- { \
- ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
- // Index to Backend
- auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- ms_options.setBackendMap(map_str);
- }
- return o;
-}
Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
- : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
+ : _model{model}, _options{&copt}
{
// DO NOTHING
}
Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
std::vector<std::unique_ptr<CompilerOptions>> &copts)
- : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+ : _model{nnpkg->primary_model()}, _options{copts[0].get()}
{
- for (uint32_t i = 0; i < copts.size(); i++)
- {
- _voptions.push_back(copts[i].get());
- }
-}
-
-void Compiler::enableToFp16()
-{
- for (auto options : _voptions)
- options->fp16_enable = true;
-}
-
-void Compiler::checkProfilerConditions()
-{
- if (_nnpkg->model_count() != 1)
- throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
-
- auto &options = *_voptions[0];
-
- if (options.he_scheduler)
- throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
- if (options.executor != "Dataflow")
- throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
-
-bool Compiler::buildPartialGraph(uint32_t num_graphs)
-{
- // Use 1st model and options only on partial graph (pipeline) compile
- assert(_nnpkg->model_count() == 1);
- assert(_voptions.size() == 1);
-
- auto model = _nnpkg->primary_model();
- auto &options = *_voptions[0];
-
- if (model->subgraphs_count() > 1)
- return false;
-
- auto partialgraphs = std::make_shared<ir::Model>();
-
- for (uint32_t idx = 0; idx < num_graphs; idx++)
- {
- auto partialgraph = std::make_unique<ir::Graph>();
- partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
- }
- model->primary_subgraph()->setPartialModel(partialgraphs);
-
- auto partial_graph = primary_subgraph()->partialgraphs();
-
- primary_subgraph()->operands().iterate(
- [&](const ir::OperandIndex &operand_index, const ir::Operand &operand) {
- auto use_operations = operand.getUses();
-
- for (auto use_operation : use_operations)
- {
- auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
- if (graph_index == options.partial_graph_options.index_to_graph.end())
- {
- throw std::runtime_error("Invalid Partition Map");
- }
- auto partition = partial_graph->at(graph_index->second);
-
- if (partition->operands().exist(operand_index))
- {
- continue;
- }
-
- auto new_operand = std::make_unique<ir::Operand>(operand);
- new_operand->clearDefUse();
- auto new_operand_ind = partition->addOperand(operand_index, std::move(new_operand));
- UNUSED_RELEASE(new_operand_ind);
- assert(new_operand_ind == operand_index);
- }
- });
-
- primary_subgraph()->operations().iterate(
- [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
- auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
- if (graph_index == options.partial_graph_options.index_to_graph.end())
- {
- throw std::runtime_error("Invalid Partition Map");
- }
- auto partition = partial_graph->at(graph_index->second);
-
- auto operand_io = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED;
- for (auto operand_index : operand_io)
- {
- if (partition->operands().exist(operand_index))
- continue;
-
- const auto &operand = primary_subgraph()->operands().at(operand_index);
-
- auto new_operand = std::make_unique<ir::Operand>(operand);
- new_operand->clearDefUse();
-
- auto new_operand_index = partition->addOperand(operand_index, std::move(new_operand));
- UNUSED_RELEASE(new_operand_index);
- assert(new_operand_index == operand_index);
- }
-
- auto new_operation_index = partition->addOperation(operation_index, clone(operation));
- UNUSED_RELEASE(new_operation_index);
- assert(new_operation_index == operation_index);
- });
-
- for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
- {
- auto partition = partial_graph->at(ir::SubgraphIndex{idx});
-
- partition->operands().iterate([&](const ir::OperandIndex &operand_index,
- const ir::Operand &operand) {
- if (primary_subgraph()->getInputs().contains(operand_index) ||
- (!operand.getDef().valid() && !operand.isConstant()))
- {
- partition->addInput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
- }
- if (primary_subgraph()->getOutputs().contains(operand_index) || operand.getUses().size() == 0)
- {
- partition->addOutput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
- }
-
- if (primary_subgraph()->operands().at(operand_index).getUses().size() > 1 &&
- !primary_subgraph()->operands().at(operand_index).isConstant() &&
- !partition->getInputs().contains(operand_index))
- {
- auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
- auto iter = use_operations.begin();
- ir::SubgraphIndex graph_index =
- options.partial_graph_options.index_to_graph.find(*iter++)->second;
- while (iter != use_operations.end())
- {
- if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
- !partition->getOutputs().contains(operand_index))
- {
- partition->addOutput(operand_index,
- primary_subgraph()->tensor_names()->at(operand_index));
- }
- iter++;
- }
- }
- });
-
- partition->verify();
-
- bool same = true;
- if (partition->getInputs().size() == primary_subgraph()->getInputs().size())
- {
- for (auto iter = partition->getInputs().begin(); iter != partition->getInputs().end(); ++iter)
- {
- if (!primary_subgraph()->getInputs().contains(*iter))
- {
- same = false;
- break;
- }
- }
- if (same == true)
- {
- partition->getInputs() = primary_subgraph()->getInputs();
- }
- else
- {
- partition->input_sort();
- }
- }
-
- same = true;
- if (partition->getOutputs().size() == primary_subgraph()->getOutputs().size())
- {
- for (auto iter = partition->getOutputs().begin(); iter != partition->getOutputs().end();
- ++iter)
- {
- if (!primary_subgraph()->getOutputs().contains(*iter))
- {
- same = false;
- break;
- }
- }
- if (same == true)
- {
- partition->getOutputs() = primary_subgraph()->getOutputs();
- }
- else
- {
- partition->output_sort();
- }
- }
- }
- return true;
+ // Use for single model only
+ assert(nnpkg->model_count() == 1);
}
std::shared_ptr<CompilerArtifact> Compiler::compile(void)
{
- for (auto options : _voptions)
- {
- // Set control flow backend for control flow operators
- auto &builtin_id = backend::builtin::Config::ID;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
-
- // FIXME This is a workaround for bcq operations, should remove it
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
-
- // FIXME This is a workaround for bulk operations, should remove it
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
-
- verboseOptions(*options);
- }
-
- // NYI: allow one model compilation
- auto const model_count = _nnpkg->model_count();
- if (model_count != _voptions.size())
- throw std::runtime_error{"Model count and option vector size mismatch"};
-
- for (uint32_t i = 0; i < model_count; i++)
- {
- _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(subg))
- .append(std::make_unique<pass::OddOutputPass>(subg))
- .run();
-
- // Optimizations
- pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
- });
- }
-
/***************************************************
* Prepare compilation phase
***************************************************/
- // Compilable check
- // TODO: Support hybrid execution -
- // execution between interpreter and compiled executor (including control flow)
- if (_voptions[0]->disable_compile)
- {
- if (model_count > 1)
- throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+ if (!_options)
+ throw std::runtime_error{"Empty compile option"};
- auto executors = std::make_shared<exec::Executors>();
+ // Mode check
+ // TODO handle option for each model
+ if (_options->he_profiling_mode)
+ {
+ if (!_options->he_scheduler)
+ throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
- _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- });
- _state = State::COMPILED;
- return std::make_shared<CompilerArtifact>(executors, nullptr);
+ if (_options->executor != "Dataflow")
+ throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
}
- // Mode check
- // TODO handle option for each model
- if (_voptions[0]->he_profiling_mode)
- checkProfilerConditions();
+ _options->forceInternalOptions();
+ _options->verboseOptions();
+
+ _model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
// TODO Handle dump level for each model
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
onert::dumper::dot::DotDumper dot_dumper(dump_level);
// Tracing context
auto tracing_ctx = std::make_unique<util::TracingCtx>();
- // Model edge context
- std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
-
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
-
- if (model_count == 1)
{
- _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+ _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
// Lower: Assign backend
- lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+ lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options);
// Set tracing_ctx for copied graph
- tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
});
}
- else
- {
- // TODO Support tracing_ctx for multiple model
- tracing_ctx = nullptr;
- // Copy model edge context
- model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+ _model.reset();
- for (uint32_t i = 0; i < model_count; i++)
- {
- auto model = _nnpkg->model(ir::ModelIndex{i});
- if (model->subgraphs_count() != 1)
- throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
- auto subg = model->primary_subgraph();
- dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
-
- // For multimodel, model index is used for lowered graph index in lowered graph map
- // and index type is SubgraphIndex
- // TODO Find better way to represent lowered graph index for multimodel's subgraph
- lowered_subgs[ir::SubgraphIndex{i}] =
- std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
- }
- }
-
- _nnpkg.reset();
-
- for (auto &pair : lowered_subgs)
+ for (const auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
- dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
+ const auto &lowered_subg = pair.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
}
// Shape inference.
@@ -566,28 +119,15 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
// Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
// recursively
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
- createStaticShapeInferers(lowered_subgs);
+ StaticShapeInferer::createStaticShapeInferers(lowered_subgs);
- if (model_count == 1)
- {
- const auto primary_subg_idx = ir::SubgraphIndex{0};
- inferers.at(primary_subg_idx)->infer();
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
- for (const auto &pair : inferers)
- {
- const auto inferer = pair.second.get();
- inferer->dump();
- }
- }
- else
+ for (const auto &pair_inferer : inferers)
{
- // Assume multi model has only one subgraph on each model
- for (const auto &pair : inferers)
- {
- const auto inferer = pair.second.get();
- inferer->infer();
- inferer->dump();
- }
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
}
}
@@ -598,7 +138,7 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
// - Check parameter value validation which valid value is depend on input tensor shape
// - Output tensor shape validation check is needless because
// static/dynamic shape inferer will make valid output shape
- for (auto &pair : lowered_subgs)
+ for (const auto &pair : lowered_subgs)
{
auto &lowered_subg = pair.second;
compiler::ShapeValidator{lowered_subg->graph()}();
@@ -607,240 +147,30 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
/*************************************************************
* Backend independent analysis & optimization phase finished
*************************************************************/
- auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
- for (auto &pair : lowered_subgs)
+ auto executors = std::make_shared<exec::SingleModelExecutors>();
+ for (auto &&pair : lowered_subgs)
{
- const auto &subg_index = pair.first;
+ auto const model_index = ir::ModelIndex{0};
+ auto const subg_index = pair.first;
auto &lowered_subg = pair.second;
- auto indexed_ranks = lowered_subg->indexed_ranks();
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
ir::OperationDumper dumper("Executor generation of Subgraph " +
std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
- std::move(lowered_subg), tracing_ctx.get(), options, executors)};
+ std::move(lowered_subg), tracing_ctx.get(), *_options, executors, model_index)};
executor->setIndexedRanks(indexed_ranks);
- executors->emplace(subg_index, std::move(executor));
+ executors->emplace(model_index, subg_index, std::move(executor));
}
/********************************
* Code generation phase finished
********************************/
- _state = State::COMPILED;
return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
}
-std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
- const char *map_file_path)
-{
- // Allow one model compilation for pipeline
- if (_nnpkg->model_count() != 1)
- throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
- assert(_voptions.size() == 1);
-
- auto model = _nnpkg->primary_model();
- auto &options = *_voptions[0];
-
- std::string package_path(package_file_path);
- std::string partition_map_file;
-
- if (map_file_path)
- {
- partition_map_file = map_file_path;
- }
- else
- {
- partition_map_file = package_path + "/partition_map.json";
- }
-
- std::ifstream pmfs(partition_map_file);
- Json::Value root;
- pmfs >> root;
- const Json::Value &map = root["partition_map"];
- const Json::Value &np = root["num_partitions"];
-
- uint32_t num_graphs = 1;
-
- if (pmfs.is_open())
- {
- num_graphs = np.asUInt();
- for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
- {
- options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
- ir::SubgraphIndex{map[i].asUInt()};
- }
- }
- else
- {
- throw std::runtime_error("There is no partition map file");
- }
-
- if (!buildPartialGraph(num_graphs))
- {
- throw std::runtime_error("It doesn't support in case there are subgraphs");
- }
-
- // Set control flow backend for control flow operators
- {
- auto &builtin_id = backend::builtin::Config::ID;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- }
-
- // FIXME This is a workaround for bcq operations, should remove it
- {
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
- }
-
- // FIXME This is a workaround for bulk operations, should remove it
- {
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
- }
-
- verboseOptions(options);
-
- model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- auto part = subg.partialgraphs();
- part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(partialgraph))
- .append(std::make_unique<pass::OddOutputPass>(partialgraph))
- .run();
-
- // Optimizations
- pass::PassRunner{}
- .append(std::make_unique<pass::UnusedOperandEliminationPass>(partialgraph))
- .run();
- });
- });
-
- /***************************************************
- * Prepare compilation phase
- ***************************************************/
-
- // Compilable check
- // TODO: Support hybrid execution -
- // execution between interpreter and compiled executor (including control flow)
- if (options.disable_compile)
- {
- std::vector<std::shared_ptr<CompilerArtifact>> results;
- auto executors = std::make_shared<exec::Executors>();
-
- model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- });
- results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
- _state = State::COMPILED;
- return results;
- }
-
- // Mode check
- if (options.he_profiling_mode)
- checkProfilerConditions();
-
- /***************************************************
- * Backend independent analysis & optimization phase
- ***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
- onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
-
- // Lower: Assign backend
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- lowered_partialgraphs;
- model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- auto part = subg.partialgraphs();
- part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
- dot_dumper_part.dump(partialgraph,
- nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
-
- // // Lower: Assign backend
- lowered_partialgraphs[pindex] =
- std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
- });
- });
-
- for (auto &pair : lowered_partialgraphs)
- {
-
- const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
- dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
- std::to_string(partialgraph_index.value()));
- }
-
- // Partial Graph shape inference
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
- createStaticShapeInferers(lowered_partialgraphs);
- // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
- for (auto &pair : lowered_partialgraphs)
- {
- const auto &partialgraph_index = pair.first;
- const auto partial_inferer = inferers.at(partialgraph_index).get();
- partial_inferer->infer();
- partial_inferer->dump();
- }
-
- // Shape validation
- // TODO Move shape independent feature check from ShapeValidator to OperationValidator
- // TODO Move ShapeValidator into shape inference
- // - Check input tensor shape validation
- // - Check parameter value validation which valid value is depend on input tensor shape
- // - Output tensor shape validation check is needless because
- // static/dynamic shape inferer will make valid output shape
- for (auto &pair : lowered_partialgraphs)
- {
- auto &lowered_partialgraph = pair.second;
- compiler::ShapeValidator{lowered_partialgraph->graph()}();
- }
-
- /*************************************************************
- * Backend independent analysis & optimization phase finished
- *************************************************************/
- std::map<uint32_t, std::unique_ptr<compiler::LoweredGraph>> ordered;
- for (auto &pair : lowered_partialgraphs)
- {
- // const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
-
- ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
- }
-
- std::vector<std::shared_ptr<CompilerArtifact>> results;
- for (auto &pair : ordered)
- {
- auto executors = std::make_shared<exec::Executors>();
-
- const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
- auto &lowered_partialgraph = pair.second;
- auto indexed_ranks = lowered_partialgraph->indexed_ranks();
- ir::OperationDumper dumper("Executor generation of Subgraph " +
- std::to_string(partialgraph_index.value()));
- lowered_partialgraph->graph().operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
- executor->setIndexedRanks(indexed_ranks);
- executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
-
- // It doesn't support tracing in case of partial graph
- results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
- }
-
- _nnpkg.reset();
- /********************************
- * Code generation phase finished
- ********************************/
- _state = State::COMPILED;
-
- return results;
-}
-
} // namespace compiler
-
} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h b/runtime/onert/core/src/compiler/CompilerFactory.cc
index 7290ff5da..d8d4bb277 100644
--- a/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
+++ b/runtime/onert/core/src/compiler/CompilerFactory.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,31 +14,32 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
-#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#include "compiler/CompilerFactory.h"
-#include "absl/status/status.h"
-#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "MultiModelCompiler.h"
+
+#include "compiler/Compiler.h"
namespace onert
{
-namespace backend
+namespace compiler
{
-namespace gpu_cl
+
+CompilerFactory &CompilerFactory::get()
{
+ static CompilerFactory singleton;
+ return singleton;
+}
-enum TensorType
+std::unique_ptr<ICompiler>
+CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
{
- TENSOR_TYPE_VALID = 0,
- TENSOR_TYPE_INPUT = 1,
- TENSOR_TYPE_OUTPUT = 2,
- TENSOR_TYPE_DELETE = 3
-};
+ if (nnpkg->model_count() == 1)
+ return std::make_unique<Compiler>(nnpkg, copts);
-absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
+ return std::make_unique<MultiModelCompiler>(nnpkg, copts);
+}
-} // namespace gpu_cl
-} // namespace backend
+} // namespace compiler
} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc
new file mode 100644
index 000000000..b5fd392e0
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerOptions.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerOptions.h"
+
+#include "../backend/builtin/Backend.h"
+
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/string_helpers.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+ std::unordered_map<ir::OpCode, std::string>::iterator it;
+ std::string opbackends;
+
+ for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+ {
+ if (!opbackends.empty())
+ opbackends = opbackends + ", ";
+
+ auto opcode = it->first;
+ const std::string opname = ir::toString(opcode);
+ opbackends += opname + "=" + it->second;
+ }
+ return opbackends;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace compiler
+{
+
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
+{
+ // TODO Support multiple subgraphs for manual scheduling
+ auto key_val_list = nnfw::misc::split(str, ';');
+ for (const auto &key_val_str : key_val_list)
+ {
+ if (key_val_str.empty())
+ {
+ continue;
+ }
+
+ auto key_val = nnfw::misc::split(key_val_str, '=');
+ const auto &key_str = key_val.at(0);
+ const auto &val = key_val.at(1);
+ auto key = static_cast<uint32_t>(std::stoi(key_str));
+ this->index_to_backend.emplace(ir::OperationIndex{key}, val);
+ }
+}
+
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
+{
+ auto o = std::make_unique<CompilerOptions>();
+ o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+ o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ o->executor = util::getConfigString(util::config::EXECUTOR);
+ o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
+ {
+ // Backend for all
+ auto &ms_options = o->manual_scheduler_options;
+
+ // Default value for op_backend_all is first element in the backend list
+ ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName) \
+ { \
+ const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+ if (!backend_str.empty()) \
+ { \
+ ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
+ } \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+ // Index to Backend
+ auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+ ms_options.setBackendMap(map_str);
+ }
+ return o;
+}
+
+void CompilerOptions::forceInternalOptions()
+{
+ // Set control flow backend for control flow operators
+ auto &builtin_id = backend::builtin::Config::ID;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+
+ // FIXME This is a workaround for bcq operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+ // FIXME This is a workaround for bulk operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+}
+
+void CompilerOptions::verboseOptions()
+{
+ VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+ VERBOSE(Compiler) << "backend_list : "
+ << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl;
+ VERBOSE(Compiler) << "trace_filepath : " << trace_filepath << std::endl;
+ VERBOSE(Compiler) << "graph_dump_level : " << graph_dump_level << std::endl;
+ VERBOSE(Compiler) << "executor : " << executor << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : " << manual_scheduler_options.backend_for_all
+ << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl;
+ VERBOSE(Compiler) << "he_scheduler : " << he_scheduler << std::endl;
+ VERBOSE(Compiler) << "he_profiling_mode : " << he_profiling_mode << std::endl;
+ VERBOSE(Compiler) << "fp16_enable : " << fp16_enable << std::endl
+ << std::noboolalpha;
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 024556e7e..b09d6b021 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -196,7 +196,7 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b
// Create contexts
auto whole_op_order = lgraph.graph().topolSortOperations();
- for (auto &pair : context_data_map)
+ for (auto &&pair : context_data_map)
{
auto backend = pair.first;
auto &data = pair.second;
@@ -240,18 +240,22 @@ ExecutorFactory &ExecutorFactory::get()
ExecutorFactory::ExecutorFactory()
{
_map["Linear"] = createLinearExecutor;
- _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, std::placeholders::_4, false);
- _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, std::placeholders::_4, true);
+ _map["Dataflow"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, false);
+ _map["Parallel"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, true);
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)
{
- return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
+ return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors,
+ index);
}
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
@@ -282,10 +286,11 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
}
void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::Executors> &executors,
- const backend::BackendContexts &backend_contexts)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index)
{
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
if (builtin_context != nullptr)
@@ -293,6 +298,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
auto builtin_kernel_gen = builtin_context->kernel_gen;
builtin_kernel_gen->setTensorRegistries(tensor_regs);
builtin_kernel_gen->setExecutors(executors);
+ builtin_kernel_gen->setModelIndex(index);
}
}
}
@@ -302,7 +308,7 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
{
std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
// NOTE builtin backend must be processed lastly.
// This is because of Permute layer's specialty which is the only operation that could have
@@ -319,7 +325,8 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
exec::IExecutor *ExecutorFactory::createLinearExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)
{
auto &graph = lowered_graph->graph();
@@ -337,7 +344,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
auto order = Linear::linearize(*lowered_graph);
Linear::dump(*lowered_graph, order);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
pair.second->genTensors();
}
@@ -345,7 +352,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
ExecutionBuilder builder;
@@ -406,10 +413,10 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
}
// Generate kernels
- for (auto &pair : ordered_contexts)
+ for (auto &&pair : ordered_contexts)
{
auto codes = pair.second->genKernels();
- for (auto &pair : codes)
+ for (auto &&pair : codes)
{
auto &op_ind = pair.first;
auto &fn_seq = pair.second;
@@ -444,8 +451,8 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
- bool parallel)
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index, bool parallel)
{
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -457,7 +464,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
(lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
pair.second->genTensors();
}
@@ -465,7 +472,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
ExecutionBuilder builder;
@@ -473,10 +480,10 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
auto ordered_contexts = orderBackendContext(backend_contexts);
// Generate kernels
- for (auto &pair : ordered_contexts)
+ for (auto &&pair : ordered_contexts)
{
auto codes = pair.second->genKernels();
- for (auto &pair : codes)
+ for (auto &&pair : codes)
{
auto &op_ind = pair.first;
auto &fn_seq = pair.second;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 70c089f8c..f8f989043 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,7 +21,7 @@
#include "backend/ITensor.h"
#include "compiler/LoweredGraph.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include <deque>
#include <unordered_map>
@@ -40,7 +40,8 @@ public:
exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index);
private:
ExecutorFactory();
@@ -49,26 +50,28 @@ private:
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts);
static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::Executors> &executors,
- const backend::BackendContexts &backend_contexts);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index);
static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
orderBackendContext(const backend::BackendContexts &backend_contexts);
static exec::IExecutor *createLinearExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
- static exec::IExecutor *
- createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors, bool parallel);
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index);
+ static exec::IExecutor *createDataflowExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index, bool parallel);
private:
std::unordered_map<
std::string,
std::function<exec::IExecutor *(
std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)>>
_map;
};
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 98dc906e4..fdf4e24f0 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -393,10 +393,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
- for (auto &op_idx : op_seq)
+ for (const auto &op_idx : op_seq)
{
const auto &node = operations.at(op_idx);
- for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
+ for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
{
if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
continue;
@@ -410,7 +410,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
}
- for (auto &ind : node.getOutputs())
+ for (const auto &ind : node.getOutputs())
{
if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
continue;
@@ -747,7 +747,7 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
// | |
// [OPERATION] [OPERATION]
//
- for (auto &op_seq_ind : found_input_in_op_seqs->second)
+ for (const auto &op_seq_ind : found_input_in_op_seqs->second)
{
auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
@@ -799,13 +799,13 @@ Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_
OpSeqIndexList list;
for (const auto &it : opseq_map_to_delete)
{
- auto &opseq_ind_fp16_to_fp32 = it.first;
+ const auto &opseq_ind_fp16_to_fp32 = it.first;
if (list.find(opseq_ind_fp16_to_fp32) == list.end())
{
list.emplace(opseq_ind_fp16_to_fp32);
}
- for (auto &opseq_ind_fp32_to_fp16 : it.second)
+ for (const auto &opseq_ind_fp32_to_fp16 : it.second)
{
if (list.find(opseq_ind_fp32_to_fp16) == list.end())
{
@@ -869,7 +869,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
- for (auto &op_seq_ind_fp32_to_fp16 : it.second)
+ for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
{
auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
assert(op_seq_fp32_to_fp16.size() == 1);
@@ -879,7 +879,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
assert(found_next_to_fp16 != input_to_op_seqs.end());
- for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
+ for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
{
manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
}
@@ -901,7 +901,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
auto &operations = _lowered_graph.graph().operations();
auto &op_seqs = _lowered_graph.op_seqs();
- for (auto &op_seq_ind : list_to_delete_op_seqs)
+ for (const auto &op_seq_ind : list_to_delete_op_seqs)
{
auto &op_seq = op_seqs.at(op_seq_ind);
assert(op_seq.size() == 1);
@@ -914,7 +914,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
// Uses
- for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
obj.removeUse(first_node_ind);
@@ -923,7 +923,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
}
// Def
- for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
assert(obj.getDef() == first_node_ind);
@@ -942,7 +942,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
}
// Operand
- for (auto &ind : list_to_delete_ops)
+ for (const auto &ind : list_to_delete_ops)
{
operands.remove(ind);
VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index c4bfddb8f..65fd4cd77 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -512,7 +512,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
// Find free time for data transferring and insert it into backend taskset. This is needed:
// 1. Time for multiple permutations for this node's input is found correctly
// 2. If backend==cpu, then free time for this node must come after permutations
- for (auto &it : transfer_st_exec_time)
+ for (auto &&it : transfer_st_exec_time)
{
if (_is_parallel_exec)
{
diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
index c4a2df025..589331b49 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.test.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -163,7 +163,7 @@ void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
ExecTime et(backends);
for (int i = 0; i < op_names.size(); ++i)
{
- for (auto &backend : backends)
+ for (const auto backend : backends)
setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
}
et.storeOperationsExecTime();
@@ -189,7 +189,7 @@ void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
ExecTime et(backends);
for (const auto &backend : backends)
{
- for (auto &other_backend : backends)
+ for (const auto other_backend : backends)
{
if (backend == other_backend)
continue;
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 9e84753a7..d53d0ed00 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -44,14 +44,6 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
lowerGraph(options);
}
-// TODO Design better class and constructor to represent parent_graph
-LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
- const CompilerOptions &options)
- : _graph{graph}, _parent_graph{parent_graph}
-{
- lowerGraph(options);
-}
-
void LoweredGraph::lowerGraph(const CompilerOptions &options)
{
// Build backend contexts
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index af2d84cd9..621f0c7b7 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -64,7 +64,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
// 2. Backend per operation type
std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
- for (auto &pair : manual_options.opcode_to_backend)
+ for (const auto &pair : manual_options.opcode_to_backend)
{
op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
}
@@ -80,7 +80,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
});
// 3. Backend per operation
- for (auto &pair : manual_options.index_to_backend)
+ for (const auto &pair : manual_options.index_to_backend)
{
const auto &key = pair.first;
const auto &val = pair.second;
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
new file mode 100644
index 000000000..fea6a7f25
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MultiModelCompiler.h"
+
+#include "ExecutorFactory.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/Executors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+
+namespace onert
+{
+namespace compiler
+{
+
+MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _nnpkg{nnpkg}, _voptions{}
+{
+ assert(nnpkg->model_count() != 1);
+
+ for (uint32_t i = 0; i < copts.size(); i++)
+ {
+ _voptions.push_back(copts[i].get());
+ }
+}
+
+std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void)
+{
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+ for (auto options : _voptions)
+ {
+ if (!options)
+ throw std::runtime_error{"Empty compile option"};
+
+ // Mode check
+ // TODO handle option for each model
+ if (options->he_profiling_mode)
+ throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+ options->forceInternalOptions();
+ options->verboseOptions();
+ }
+
+ // NYI: allow one model compilation
+ auto const model_count = _nnpkg->model_count();
+ if (model_count != _voptions.size())
+ throw std::runtime_error{"Model count and option vector size mismatch"};
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
+ }
+
+ /***************************************************
+ * Backend independent analysis & optimization phase
+ ***************************************************/
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ // TODO Support tracing_ctx for multiple model
+ std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr;
+
+ // Model edge context: copy model edge context
+ auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+
+ // Lower: Assign backend
+ std::unordered_map<ir::ModelIndex,
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>>
+ lowered_subgs;
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ auto const model_index = ir::ModelIndex{i};
+ auto model = _nnpkg->model(model_index);
+
+ model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
+ dot_dumper.dump(subg,
+ nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value()));
+ // Lower: Assign backend
+ lowered_subgs[model_index][subg_index] =
+ std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]);
+ // Set tracing_ctx for copied graph
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()),
+ subg_index.value());
+ });
+ }
+
+ _nnpkg.reset();
+
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_index = pair.first;
+ const auto &model_lsubg = pair.second;
+
+ for (const auto &pair_inner : model_lsubg)
+ {
+ const auto &subg_index = pair_inner.first;
+ const auto &lowered_subg = pair_inner.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(),
+ "-subg-", subg_index.value()));
+ }
+ }
+
+ // Shape inference.
+ for (auto &&pair : lowered_subgs)
+ {
+ auto &model_lsubgs = pair.second;
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ StaticShapeInferer::createStaticShapeInferers(model_lsubgs);
+
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair_inferer : inferers)
+ {
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
+ }
+ }
+
+ // Shape validation
+ // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+ // TODO Move ShapeValidator into shape inference
+ // - Check input tensor shape validation
+ // - Check parameter value validation which valid value is depend on input tensor shape
+ // - Output tensor shape validation check is needless because
+ // static/dynamic shape inferer will make valid output shape
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_lsubgs = pair.second;
+
+ for (const auto &pair_inner : model_lsubgs)
+ {
+ const auto &lowered_subg = pair_inner.second;
+ compiler::ShapeValidator{lowered_subg->graph()}();
+ }
+ }
+
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+ auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
+ for (auto &&pair : lowered_subgs)
+ {
+ auto const &model_index = pair.first;
+ auto &model_lsubgs = pair.second;
+
+ for (auto &&pair_inner : model_lsubgs)
+ {
+ auto const subg_index = pair_inner.first;
+ auto &lowered_subg = pair_inner.second;
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+
+ auto &options = *_voptions[model_index.value()];
+ auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+ std::move(lowered_subg), tracing_ctx.get(), options, executors, model_index)};
+ executor->setIndexedRanks(indexed_ranks);
+ executors->emplace(model_index, subg_index, std::move(executor));
+ }
+ }
+
+ /********************************
+ * Code generation phase finished
+ ********************************/
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h
new file mode 100644
index 000000000..89af664f8
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MultiModelCompiler.h
+ * @brief This file contains MultiModelCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class MultiModelCompiler final : public ICompiler
+{
+public:
+ /**
+ * @brief Construct a new Compiler object for NN package
+ * @param[in] nnpkg NN package to compile
+ * @param[in] coptions Compiler option vector for each model in package
+ */
+ MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+ /**
+ * @brief Destroy the MultiModelCompiler object
+ */
+ ~MultiModelCompiler() = default;
+
+public:
+ /**
+ * @brief Do compilation with the options
+ *
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+ std::shared_ptr<ir::Graph> &primary_subgraph()
+ {
+ return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+ }
+
+private:
+ std::shared_ptr<ir::NNPkg> _nnpkg;
+ std::vector<CompilerOptions *> _voptions;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index 485450560..25747d950 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -18,6 +18,8 @@
#include "util/ShapeInference.h"
#include "util/logging.h"
+#include <misc/polymorphic_downcast.h>
+
#include <sstream>
#include <stdexcept>
@@ -188,6 +190,95 @@ void StaticShapeInferer::dump()
});
}
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+StaticShapeInferer::createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs)
+{
+ // Allocate StaticShapeInferer per each subgraph
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers;
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg.get());
+ }
+
+ // Append observers in all StaticShapeInferers
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+
+ // TODO: Change this iteration for all to controlflow iteration
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) {
+ // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+ // call StaticShapeInferes of child subgraphs recursively
+ auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+ auto *child_inferer = inferers.at(child_subg_idx).get();
+ inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+ };
+
+ // A Function to appaend subg input observers. This makes it possible for a
+ // StaticShapeInferer to update inputs of child subgraphs
+ auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> child_subg_inputs;
+ auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+ for (const auto &input_idx : child_subg.getInputs())
+ {
+ auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+ child_subg_inputs.emplace_back(operand_ptr);
+ }
+ inferers.at(subg_index)
+ ->appendSubgInputObserver(child_subg_idx,
+ std::make_unique<OperandObserver>(child_subg_inputs));
+ };
+
+ // A Function to set controlflow output observers. This makes it possible for a
+ // StaticShapeInferer to update outputs of parent controlflow opeerations
+ auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> cf_outputs;
+ auto &subg = lowered_subg->graph();
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto operand_ptr = subg.operands().getRawPtr(output_idx);
+ cf_outputs.emplace_back(operand_ptr);
+ }
+ inferers.at(child_subg_idx)
+ ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs));
+ };
+
+ // Append Observers in a StaticShapeInferer
+ if (op.opcode() == ir::OpCode::If)
+ {
+ const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+ appendChildInferer(if_op.param().then_subg_index);
+ appendChildInferer(if_op.param().else_subg_index);
+
+ appendSubgraphInputObserver(if_op.param().then_subg_index);
+ appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+ setControlFlowOutputObserver(if_op.param().then_subg_index);
+ }
+ else if (op.opcode() == ir::OpCode::While)
+ {
+ const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+ appendChildInferer(while_op.param().cond_subg_index);
+ appendChildInferer(while_op.param().body_subg_index);
+
+ appendSubgraphInputObserver(while_op.param().cond_subg_index);
+ appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+ setControlFlowOutputObserver(while_op.param().body_subg_index);
+ }
+ });
+ }
+
+ return inferers;
+}
+
void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
auto &operands = _lowered_subg->graph().operands();
@@ -1306,8 +1397,11 @@ void StaticShapeInferer::visit(const ir::operation::Bulk &op)
auto origin_output_shape = op.param().origin_output_shapes[0];
// TODO: more check for valid batch request
- assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
- assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+ if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) ||
+ (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0))
+ {
+ throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size");
+ }
size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
ir::Shape new_shape;
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index b3cc0bbe3..c7e06e84c 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -71,7 +71,7 @@ public:
backend::ITensor *getITensor(ir::OperandIndex ind) const
{
- for (auto &tensor_reg : _tensor_regs)
+ for (auto &&tensor_reg : _tensor_regs)
{
auto tensor = tensor_reg->getITensor(ind);
if (tensor)
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
index f50fae0d3..e2b3f6111 100644
--- a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
@@ -34,7 +34,7 @@ void OddOutputPass::run()
VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
<< std::endl;
- for (auto &ind : outputs)
+ for (const auto &ind : outputs)
{
if (_graph.getInputs().contains(ind))
{
@@ -46,7 +46,7 @@ void OddOutputPass::run()
VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
std::unordered_set<ir::OperandIndex> occurence;
- for (auto &ind : outputs)
+ for (auto &&ind : outputs)
{
auto &obj = _graph.operands().at(ind);
if (occurence.count(ind) == 0)
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc
index 1be6d7794..2d11be201 100644
--- a/runtime/onert/core/src/compiler/pass/PassRunner.cc
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc
@@ -31,7 +31,7 @@ PassRunner &PassRunner::append(std::unique_ptr<Pass> pass)
void PassRunner::run()
{
- for (auto &pass : _passes)
+ for (auto &&pass : _passes)
{
VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
pass->run();
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 71efa1bb5..0da1e54df 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -105,9 +105,9 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
}
}
- for (auto &operation : remove_list)
+ for (const auto &operation_index : remove_list)
{
- object.removeUse(operation);
+ object.removeUse(operation_index);
}
}
}
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 9d1e06d6c..7d5b406ef 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -23,13 +23,12 @@ namespace onert
namespace exec
{
-Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors}
{
assert(executors != nullptr);
- assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
+ assert(executors->entryExecutor() != nullptr);
_io_desc.inputs.resize(_executors->inputSize());
_io_desc.outputs.resize(_executors->outputSize());
- sem_init(&_async_io_descs_sem, 0, 1);
}
void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
@@ -70,80 +69,6 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
_io_desc.inputs.at(index.value()) = std::make_unique<InputDesc>(info, buffer, length, layout);
}
-void Execution::createNewAsyncDesc(uint32_t count)
-{
- IODescription *_async_io_desc = new IODescription;
- _async_io_desc->inputs.resize(primary_subgraph().getInputs().size());
- _async_io_desc->outputs.resize(primary_subgraph().getOutputs().size());
-
- _async_io_descs.push_back({_async_io_desc, count});
-}
-
-void Execution::setFinish() { finished = true; }
-
-bool Execution::isEmptyQueue()
-{
- asyncIoDescSemWait();
- bool ret = _async_io_descs.empty();
- if (!ret)
- {
- for (uint32_t idx = 0; idx < _async_io_descs.front().first->inputs.size(); idx++)
- {
- if (_async_io_descs.front().first->inputs.at(idx).get() == nullptr)
- {
- ret = true;
- break;
- }
- }
- }
- asyncIoDescSemPost();
- return ret;
-}
-
-void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout)
-{
- const auto info = _executors->inputInfo(index);
- IODescription *_async_io_desc = _async_io_descs.back().first;
-
- {
- auto input_shape_sig = _async_io_desc->dynamic_input_shapes.find(index);
- auto size_required =
- (input_shape_sig != _async_io_desc->dynamic_input_shapes.end())
- ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
- : info.total_size();
-
- if (length < size_required)
- {
- throw std::runtime_error{"Too small length"};
- }
- }
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, buffer, length);
-
- _async_io_desc->inputs.at(index.value()) =
- std::make_unique<InputDesc>(info, _buffer, length, layout);
-}
-
-void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout layout)
-{
- const auto info = _executors->outputInfo(index);
- IODescription *_async_io_desc = _async_io_descs.front().first;
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _async_io_desc->outputs.at(index.value()) =
- std::make_unique<OutputDesc>(info, buffer, length, layout);
-}
-
// TODO Remove default parameter
void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
const void *buffer, size_t length, ir::Layout layout)
@@ -209,18 +134,6 @@ void Execution::execute()
VERBOSE(Execution) << "Execution finished" << std::endl;
}
-void Execution::AsyncExecute()
-{
- VERBOSE(Execution) << "Start Async execution" << std::endl;
- if (_async_io_descs.empty())
- {
- VERBOSE(Execution) << "The input is not ready" << std::endl;
- return;
- }
-
- primary_executor()->execute(*_async_io_descs.front().first);
-}
-
void Execution::startExecute()
{
VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
@@ -251,163 +164,21 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
}
}
+// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or
+// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution.
+// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is
+// called before execution.
+// To handle both case, this method retun static shape inference result and fail will be handled on
+// NNAPI frontend.
ir::Shape Execution::getOutputShape(ir::IOIndex ind) const
{
if (!isFinished())
- throw std::runtime_error("Cannot get output shape before execution is finished");
+ return _executors->outputInfo(ind).shape();
const auto &output_desc = _io_desc.outputs.at(ind.value());
return output_desc->info.shape();
}
-void Execution::asyncIoDescSemWait() { sem_wait(&_async_io_descs_sem); }
-
-void Execution::asyncIoDescSemPost() { sem_post(&_async_io_descs_sem); }
-
-void Execution::runInference()
-{
- uint32_t inference_cnt;
- uint32_t output_sz = primary_subgraph().getOutputs().size();
- while (true)
- {
- if (isEmptyQueue())
- {
- if (isFinished())
- {
- if (!next_exes.empty())
- {
- for (uint32_t i = 0; i < next_exes.size(); i++)
- {
- std::get<0>(next_exes[i])->setFinish();
- }
- }
- else
- {
- sholudStop();
- }
- break;
- }
- }
- else
- {
- for (uint32_t i = 0; i < output_sz; i++)
- {
- auto opidx = primary_subgraph().getOutputs().at(i);
- auto shape = primary_subgraph().operands().at(opidx).shape();
- auto dtype = primary_subgraph().operands().at(opidx).typeInfo().type();
- auto rank = shape.rank();
- uint32_t tensor_size = 1;
- for (int32_t j = 0; j < rank; j++)
- {
- tensor_size *= shape.dim(j);
- }
- if (dtype == onert::ir::DataType::FLOAT32 || dtype == onert::ir::DataType::INT32 ||
- dtype == onert::ir::DataType::UINT32)
- tensor_size *= 4;
- else if (dtype == onert::ir::DataType::INT64)
- tensor_size *= 8;
- void *_buffer = (void *)malloc(tensor_size);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- executeAsyncOutput(onert::ir::IOIndex(i), _buffer, tensor_size);
- }
- AsyncExecute();
-
- // set inputs of next execution
- auto _io_desc = getAsyncIoDescs()->front().first;
- inference_cnt = getAsyncIoDescs()->front().second;
- getAsyncIoDescs()->pop_front();
-
- for (uint32_t i = 0; i < next_exes.size(); i++)
- {
- auto next_exe = std::get<0>(next_exes[i]);
- auto o_index = std::get<1>(next_exes[i]);
- auto i_index = std::get<2>(next_exes[i]);
-
- next_exe->asyncIoDescSemWait();
- auto next_io_descs = next_exe->getAsyncIoDescs();
- bool exist = false;
- for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
- {
- if (inference_cnt == iter->second)
- {
- exist = true;
- }
- }
-
- if (!exist)
- {
- next_exe->createNewAsyncDesc(inference_cnt);
- }
- for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
- {
- if (inference_cnt == iter->second)
- {
- const auto input_index = next_exe->primary_subgraph().getInputs().at(i_index.value());
- const auto info = next_exe->primary_subgraph().operands().at(input_index).info();
-
- size_t length = _io_desc->outputs[o_index.value()]->size;
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, _io_desc->outputs[o_index.value()]->buffer, length);
-
- iter->first->inputs.at(i_index.value()) = std::make_unique<onert::exec::InputDesc>(
- info, _buffer, length, onert::ir::Layout::NHWC);
- break;
- }
- }
- next_exe->asyncIoDescSemPost();
- }
-
- if (next_exes.empty())
- {
- std::vector<void *> results;
- for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
- {
- size_t length = _io_desc->outputs[i]->size;
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, _io_desc->outputs[i]->buffer, length);
- results.push_back(_buffer);
- }
- _async_results.push_back(results);
- }
-
- for (uint32_t i = 0; i < _io_desc->inputs.size(); i++)
- {
- auto p = _io_desc->inputs.at(i).release();
- if (p)
- {
- free((void *)p->buffer);
- delete p;
- }
- }
- for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
- {
- auto p = _io_desc->outputs.at(i).release();
- if (p)
- {
- free(p->buffer);
- delete p;
- }
- }
- delete _io_desc;
- }
- }
-}
-
-bool Execution::stopWait(void) const { return stop_wait; }
-
-void Execution::sholudStop() { stop_wait = true; }
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc
index e3ea49470..fefe8a332 100644
--- a/runtime/onert/core/src/exec/Execution.test.cc
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -17,6 +17,7 @@
#include "exec/Execution.h"
#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
#include "ir/Graph.h"
#include "ir/operation/BinaryArithmetic.h"
#include "util/TracingCtx.h"
@@ -90,6 +91,161 @@ public:
std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
};
+class CompiledMockUpMultiModel
+{
+public:
+ CompiledMockUpMultiModel()
+ {
+ // Model0: a float elementwise add operation
+ // Model0 input: lhs0, rhs0
+ // Model0 output: add result (result0)
+
+ // Model1: a qasymm8 elementwise add operation
+ // Model1 input: result0, rhs1
+ // Model1 output: add result (result1)
+
+ // Model2: a float elementwise add operation
+ // Model2 input: result0, result1
+ // Model2 output: add result (result2)
+
+ // constant: rhs2
+ // result0 <= (lhs0 + rhs0)
+ // result1 <= (result0 + rhs1)
+ // result2 <= (result0 + result1)
+ // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+
+ // Update edge information
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0});
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1});
+ edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0});
+ // From
+ const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}};
+ const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ // To
+ const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}};
+ const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}};
+ edges.edges.insert({result0, lhs1});
+ edges.edges.insert({result0, lhs2});
+ edges.edges.insert({result1, rhs2});
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ graphs.emplace_back(std::make_shared<Graph>());
+ }
+ Shape shape{1, 2, 2, 1};
+
+ // Model0's add operands (result1 <= lhs0 + rhs0)
+ DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32};
+ auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+
+ // Model0's add operation
+ operation::BinaryArithmetic::Param param0;
+ param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param0.activation = Activation::NONE;
+ auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0};
+ auto output_set0 = OperandIndexSequence{operand_result0};
+ graphs[0]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0));
+
+ // Model0's inputs/outputs
+ graphs[0]->addInput(operand_lhs0);
+ graphs[0]->addInput(operand_rhs0);
+ graphs[0]->addOutput(operand_result0);
+ graphs[0]->verify();
+
+ // Model1's add operands (result2 <= Model0 result + rhs1)
+ // static float rhs1_data[4] = {3, 1, -1, 5};
+ static uint8_t rhs1_data[4] = {131, 129, 127, 133};
+ const float scale = 1;
+ const int32_t zero_point = 128;
+ auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ graphs[1]
+ ->operands()
+ .at(operand_rhs1)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4));
+
+ // Model1's add operation
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graphs[1]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+ // Model1's inputs/outputs
+ graphs[1]->addInput(operand_lhs1);
+ graphs[1]->addOutput(operand_result1);
+ graphs[1]->verify();
+
+ // Model2's additional operands (result3 <= Model0 result + Model1 result)
+ auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+
+ // Model2's add operation
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graphs[2]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+ // Model1's inputs/outputs
+ graphs[2]->addInput(operand_lhs2);
+ graphs[2]->addInput(operand_rhs2);
+ graphs[2]->addOutput(operand_result2);
+ graphs[2]->verify();
+
+ // Compile
+ compile();
+ }
+
+public:
+ void compile()
+ {
+ auto nnpkg = std::make_shared<onert::ir::NNPkg>();
+ coptions.clear();
+ for (uint16_t i = 0; i < graphs.size(); ++i)
+ {
+ coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(SubgraphIndex{0}, graphs[i]);
+
+ nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+ }
+ for (const auto &pkg_input : edges.pkg_inputs)
+ {
+ nnpkg->addInput(pkg_input);
+ }
+ for (const auto &pkg_output : edges.pkg_outputs)
+ {
+ nnpkg->addOutput(pkg_output);
+ }
+ for (const auto &edge : edges.edges)
+ {
+ nnpkg->addEdge(edge.from, edge.to);
+ }
+ auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions);
+ nnpkg.reset();
+ artifact = compiler->compile();
+ }
+
+public:
+ std::vector<std::shared_ptr<Graph>> graphs;
+ std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+ ModelEdges edges;
+};
+
TEST(ExecInstance, simple)
{
auto mockup = CompiledMockUpModel();
@@ -209,7 +365,7 @@ class Inference
{
public:
Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
- std::shared_ptr<onert::exec::Executors> &executors)
+ std::shared_ptr<onert::exec::IExecutors> &executors)
: _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
{
// DO NOTHING
@@ -233,7 +389,7 @@ private:
const float (&_input1)[4];
const float (&_input2)[4];
float (&_output)[4];
- std::shared_ptr<onert::exec::Executors> &_executors;
+ std::shared_ptr<onert::exec::IExecutors> &_executors;
};
// Support multi-thread execution
@@ -299,4 +455,181 @@ TEST(ExecInstance, async)
}
}
+TEST(ExecInstance, multi_model_simple)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_twoCompile)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors1 = mockup.artifact->_executors;
+ onert::exec::Execution execution1{executors1};
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ mockup.compile();
+ onert::exec::Execution execution2{mockup.artifact->_executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, multi_model_twoExecution)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution1.execute();
+ execution2.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Multi-model is not thread-safe yet
+
+// Support asynchronous execution
+TEST(ExecInstance, multi_model_async)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_dequant_input_quant_output)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2}
+ const uint8_t input2_buffer[4] = {138, 98, 148, 88}; // {1, -3, 2, -4}
+ uint8_t output_buffer[4] = {};
+ const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7}
+ float scale = 0.1;
+ int32_t zero_point = 128;
+
+ onert::exec::Execution execution{executors};
+
+ onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ execution.setInput(input1, type_info, execution.getInputShape(input1),
+ reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setInput(input2, type_info, execution.getInputShape(input2),
+ reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setOutput(output, type_info, execution.getOutputShape(output),
+ reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+// TODO Add an unittest multi_model_quant_input_dequant_output
+
} // namespace
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
index d6a2bfd17..66610f0e0 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -28,7 +28,7 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleSubgraphBegin(ind);
}
@@ -36,7 +36,7 @@ void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleSubgraphEnd(ind);
}
@@ -45,7 +45,7 @@ void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleJobBegin(executor, subg_ind, op_ind, backend);
}
@@ -54,7 +54,7 @@ void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex su
void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleJobEnd(executor, subg_ind, op_ind, backend);
}
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index 1aadac2f5..91fbac323 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -22,7 +22,7 @@
#include "../util/EventRecorder.h"
#include "../util/EventWriter.h"
-#include "exec/Executors.h"
+#include "exec/IExecutor.h"
#include "ir/Index.h"
#include "ir/Operation.h"
#include "util/ITimer.h"
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index d2d204a0b..515cf8e48 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -29,8 +29,8 @@ ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_gra
backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
const util::TracingCtx *tracing_ctx)
- : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)},
- _graph{_lowered_graph->graph()}, _parent_graph{_lowered_graph->parent_graph()}, _mutex(),
+ : _lowered_graph{std::move(lowered_graph)},
+ _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
_tracing_ctx(tracing_ctx)
{
auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
@@ -120,9 +120,27 @@ void ExecutorBase::execute(const IODescription &desc)
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
+ /*
+ * Changes tensor shape and allocate memory since its shape was changed
+ * perhaps by nnfw_set_input_tensorinfo()
+ *
+ * Cases are:
+ * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is static, tensor is static - memory dealloc is not needed
+ * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
+ * at (b), operand is static, tensor is dynamic - memory dealloc is needed
+ *
+ * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
+ * since it has not been allocated yet
+ * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
+ */
+ tensor->applyShape(input_shape->second);
}
-
- handleDynamicInputTensor(ir::IOIndex{i}, desc);
}
assert(_output_tensors.size() == desc.outputs.size());
@@ -156,38 +174,9 @@ void ExecutorBase::execute(const IODescription &desc)
}
}
-/**
- * @brief Changes tensor shape and allocate memory
- * if input shape was changed by nnfw_set_input_tensorinfo()
- *
- * @note Cases are:
- * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is static, tensor is static - memory dealloc is not needed
- * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
- * at (b), operand is static, tensor is dynamic - memory dealloc is needed
- *
- * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
- * since it has not been allocated yet
- * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
- */
-void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
-{
- auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
- if (shape_sig_found != desc.dynamic_input_shapes.end())
- {
- auto changed_input_shape = shape_sig_found->second;
- _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
- }
-}
-
bool ExecutorBase::hasDynamicInput()
{
- for (auto &tensor : _input_tensors)
+ for (auto &&tensor : _input_tensors)
{
if (tensor->is_dynamic())
return true;
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index e4f914546..7aee3d9ee 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -51,9 +51,7 @@ public:
virtual ~ExecutorBase() = default;
- const ir::Graph &graph() final { return _graph; }
-
- const ir::Graph &parent_graph() final { return _parent_graph; }
+ const ir::Graph &graph() const final { return _graph; }
void execute(const IODescription &desc) final;
@@ -70,6 +68,11 @@ public:
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+ const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
+ {
+ return _input_tensors;
+ }
+
const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
{
return _output_tensors;
@@ -87,14 +90,10 @@ protected:
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
backend::BackendContexts _backend_contexts;
const ir::Graph &_graph;
- const ir::Graph &_parent_graph;
std::vector<backend::builtin::IOTensor *> _input_tensors;
std::vector<backend::builtin::IOTensor *> _output_tensors;
std::mutex _mutex;
const util::TracingCtx *_tracing_ctx;
-
-private:
- void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
index e0ee24fea..3f4b3cc7f 100644
--- a/runtime/onert/core/src/exec/Executors.cc
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -14,170 +14,628 @@
* limitations under the License.
*/
-#include "exec/Executors.h"
+#include "Executors.h"
-namespace onert
-{
-namespace exec
+#include "../backend/builtin/IOTensor.h"
+
+namespace
{
-uint32_t Executors::inputSize() const
+using namespace onert;
+
+int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
{
- return _model_edges ? _model_edges->pkg_inputs.size()
- : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+ for (size_t i = 0; i < pkg_inputs.size(); i++)
+ {
+ auto &input_desc = pkg_inputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
}
-uint32_t Executors::outputSize() const
+int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
{
- return _model_edges ? _model_edges->pkg_outputs.size()
- : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+ for (size_t i = 0; i < pkg_outputs.size(); i++)
+ {
+ auto &input_desc = pkg_outputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
}
-const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+class Executors::EdgeTensor : public backend::builtin::IOTensor
{
- if (_model_edges)
+public:
+ EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
{
- // Assume that each model may have only one subgraph
- // TODO handle general case
- const auto desc = _model_edges->pkg_inputs[index.value()];
- const auto model_idx = std::get<0>(desc);
- const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
- const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
- return _executors.at(executor_idx)->graph().operands().at(input_index).info();
}
+ ~EdgeTensor() = default;
- const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
- return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
-}
+ void allocate_buffer()
+ {
+ const auto total_size = orig_info().total_size();
+ _buffer = std::make_unique<uint8_t[]>(total_size);
+ _ref_count = 1;
-const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
-{
- if (_model_edges)
+ // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
+ // is using tensor that does not inherit IPortableTensor, Permute operation is added
+ // and all inputs/outputs become IPortableTensor at compile stage.
+ // This allows user's buffers to be set to inputs/outputs of executors.
+ setUserTensor(_buffer.get(), total_size);
+ }
+
+ void increase_ref() { _ref_count++; }
+
+ void decrease_ref()
{
- // Assume that each model may have only one subgraph
- // TODO handle general case
- auto desc = _model_edges->pkg_outputs[index.value()];
- auto model_idx = std::get<0>(desc);
- auto executor_idx = ir::SubgraphIndex{model_idx.value()};
- auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
- return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+ assert(_ref_count > 0);
+ _ref_count--;
+ if (_ref_count == 0)
+ {
+ _buffer.reset();
+ setUserTensor(nullptr, orig_info().total_size());
+ }
}
- auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
- return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+private:
+ std::unique_ptr<uint8_t[]> _buffer;
+ int32_t _ref_count;
+};
+
+void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
}
-void Executors::execute(const IODescription &desc)
+IExecutor *Executors::at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(std::make_pair(model_index, subg_index)).get();
+}
+
+uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
+
+uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
+
+const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
{
- if (_model_edges)
- return executeEntries(desc);
+ auto const desc = _model_edges->pkg_inputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getInputTensors().at(io_index.value())->orig_info();
+}
- _executors.at(ir::SubgraphIndex{0})->execute(desc);
+const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
+{
+ auto const desc = _model_edges->pkg_outputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getOutputTensors().at(io_index.value())->orig_info();
}
-void Executors::executeEntries(const IODescription &desc)
+// Allow below edges only
+// m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
+void Executors::checkSupportedMultimodel() const
{
- // Assume 2 executors only
- // Assume that each model may have only one subgraph
- // TODO Support general case
- if (_executors.size() != 2)
- throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+ // If package includes no-connection model, model_count is less than real model count in package.
+ // Then this method will throw exception based on model index
+ // 1st model: input assumption
+ // Otherwise: edges assumption
- // Assume all edges are 0:0:x -> 1:0:x
+ // Assumption: edges
+ // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
for (auto edge : _model_edges->edges)
{
- if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
- (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
- (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
- (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
- throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+ auto const model_from = std::get<ir::ModelIndex>(edge.from);
+ auto const model_to = std::get<ir::ModelIndex>(edge.to);
+ auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
+ auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
+
+ if (model_from.value() == model_to.value())
+ {
+ throw std::runtime_error{"Multi model's edge set has invalid edge"};
+ }
+
+ if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
+ (subg_to != ir::SubgraphIndex{0}))
+ throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
}
- // Assume all package inputs are 0:0:x
- for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+ // Assumption: package inputs
+ // All 1st model inputs come from package input if always m1 < m2
{
- auto input = _model_edges->pkg_inputs[i];
- if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
- (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+ auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
+ auto search_first_model = [&](const ir::IOIndex &input_index) {
+ for (const auto &input : _model_edges->pkg_inputs)
+ {
+ if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(input) == input_index))
+ return true;
+ }
+
+ return false;
+ };
+
+ for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
{
- throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+ if (!search_first_model(ir::IOIndex{i}))
+ throw std::runtime_error{"Cannot find 1st model's input buffer"};
}
}
- // Assume all package outputs are 1:0:x
- for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+ // Check whether nnpkg outputs and Edge `from` are duplicated
+ for (const auto &edge : _model_edges->edges)
{
- auto output = _model_edges->pkg_outputs[i];
- if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
- (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+ if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
+ _model_edges->pkg_outputs.end())
{
- throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+ throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
+ "with `from` of edges yet"};
}
}
+}
+
+void Executors::createEdgeQuantLayers()
+{
+ if (_is_created_edge_quant_layers)
+ {
+ return;
+ }
- const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
- const auto &graph1 = executor1->graph();
- const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
- const auto &graph2 = executor2->graph();
+ // Create EdgeTensor for edges between executors
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
+
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
+
+ const auto &from_info = from_tensor->orig_info();
+ const auto from_layout = from_tensor->orig_layout();
+ _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
+ }
- if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
- (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
- (graph1.getOutputs().size() != graph2.getInputs().size()) ||
- (graph1.getOutputs().size() != _model_edges->edges.size()))
+ // Append type-aware quantization layer for edges between executors
+ for (const auto &executor_pair : _executors)
{
- throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+ const auto &executor_index = executor_pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+
+ std::vector<backend::ITensor *> inputs;
+ std::vector<backend::ITensor *> outputs;
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
+ std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
+ {
+ const auto from_tensor = _edge_tensors[from_iodesc].get();
+ const auto &to_list = pair.second;
+
+ for (const auto &to_iodesc : to_list)
+ {
+ const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
+ const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
+ const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
+
+ const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
+ const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
+
+ // TODO Unify tensors with the same `from` tensor and same type
+ if (from_tensor->data_type() != to_tensor->data_type())
+ {
+ assert(inputs.size() == outputs.size());
+ const auto &to_info =
+ to_executor->getInputTensors().at(to_io_index.value())->orig_info();
+ const auto to_layout = to_tensor->orig_layout();
+ inputs.emplace_back(from_tensor);
+
+ auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
+ outputs.emplace_back(type_aware_quant_tensor.get());
+
+ _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
+ }
+ }
+ }
+ }
+
+ auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
+ layer->prepare();
+ _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
}
- // Prepare buffer
- // Assume buffer layout is NHWC
- std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
- std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
- const auto layout = ir::Layout::NHWC;
+ _is_created_edge_quant_layers = true;
+}
- for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+void Executors::CreatePkgIOTensors(const IODescription &desc)
+{
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
{
- const auto buf_index =
- _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
- buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
- const auto buf_size = buf_infos[i]->total_size();
- bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+ // Create IOTensor for nnpkg inputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ _pkg_input_tensors[pkg_input] =
+ std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
}
- // 1st executor
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
{
- IODescription desc1;
- const auto input_size = graph1.getInputs().size();
- const auto output_size = graph1.getOutputs().size();
- desc1.inputs.resize(input_size);
- desc1.outputs.resize(output_size);
- for (uint32_t i = 0; i < input_size; i++)
- desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
- for (uint32_t i = 0; i < output_size; i++)
- desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
- buf_infos[i]->total_size(), layout);
+ // Create IOTensor for nnpkg outputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[pkg_output] =
+ std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
+ }
+}
- executor1->execute(desc1);
+void Executors::createPkgIOQuantLayers(const IODescription &desc)
+{
+ // Append type-aware quantization layer for nnpkg inputs/outputs between executors
+ for (const auto &pair : _executors)
+ {
+ const auto &executor_index = pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+ const auto executor = pair.second.get();
+
+ // Find pkg inputs of current executor
+ std::vector<ir::IODesc> pkg_inputs;
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
+ {
+ pkg_inputs.emplace_back(pkg_input);
+ }
+ }
+ std::vector<backend::ITensor *> src_tensors;
+ std::vector<backend::ITensor *> dst_tensors;
+ for (const auto &pkg_input : pkg_inputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ auto input_desc = desc.inputs[input_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg input if type is different
+ const auto input_tensor =
+ executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
+ const auto &orig_info = input_tensor->orig_info();
+ if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = input_tensor->orig_layout();
+ auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
+ dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg inputs
+ auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_input_layer->prepare();
+ _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
+
+ // Find pkg outputs of current executor
+ std::vector<ir::IODesc> pkg_outputs;
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
+ {
+ pkg_outputs.emplace_back(pkg_output);
+ }
+ }
+ src_tensors.clear();
+ dst_tensors.clear();
+ // Create Tensors of nnpkg outputs for type-aware quantization
+ for (const auto &pkg_output : pkg_outputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ auto output_desc = desc.outputs[output_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg output if type is different
+ const auto output_tensor =
+ executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
+ const auto &orig_info = output_tensor->orig_info();
+ if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = output_tensor->orig_layout();
+ auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
+ dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg outputs
+ auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_output_layer->prepare();
+ _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
}
+}
+
+void Executors::execute(const IODescription &desc)
+{
+ // Check supported multi model package
+ checkSupportedMultimodel();
+
+ // TODO Move creating type-aware quantization layers for edges in compilation stage
+ createEdgeQuantLayers();
+
+ // TODO Create IOTensors only once and recreate them only if nnpkg info changes
+ CreatePkgIOTensors(desc);
+
+ // TODO Create type-aware quantization layers only once and recreate them only if type changes
+ createPkgIOQuantLayers(desc);
- // 2nd executor
+ // TODO Find better way to schedule order of executors
+ auto const model_count = modelCount();
+
+ auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index) {
+ for (const auto &edge : _model_edges->edges)
+ {
+ if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
+ (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
+ (std::get<ir::IOIndex>(edge.to) == io_index))
+ return edge.from;
+ }
+
+ throw std::runtime_error{"Cannot find edge for model input"};
+ };
+
+ // Execute each model
+ // NOTE May be better to use vector instead of unordered_map for _executors
+ for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
{
- IODescription desc2;
- const auto input_size = graph2.getInputs().size();
- const auto output_size = graph2.getOutputs().size();
- desc2.inputs.resize(input_size);
- desc2.outputs.resize(output_size);
+ // Find executor
+ auto executor = at(model_index, ir::SubgraphIndex{0});
+
+ // Set IOTensors
+ // TODO Set internal IOTensors only once
+ std::vector<backend::IPortableTensor *> inputs_inter;
+ std::vector<backend::IPortableTensor *> outputs_inter;
+ const auto &input_tensors = executor->getInputTensors();
+ const auto &output_tensors = executor->getOutputTensors();
+ auto const input_size = input_tensors.size();
+ auto const output_size = output_tensors.size();
+ inputs_inter.resize(input_size);
+ outputs_inter.resize(output_size);
+
+ // Set inputs of executor
+ // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
for (uint32_t i = 0; i < input_size; i++)
- desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
- buf_infos[i]->total_size(), layout);
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
+ if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
+
+ inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
+ }
+ else
+ {
+ inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ // TODO Remove const_cast (we need const_cast as ITensor is writable)
+ _pkg_input_tensors[input_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
+ }
+ else
+ {
+ auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
+
+ // Supported only sequantial execution of models
+ assert(from_model_index.value() < model_index.value());
+ assert(from_subg_index.value() == 0);
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
+ {
+ inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
+ }
+ else
+ {
+ inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
+ }
+ assert(inputs_inter[i]->buffer() != nullptr);
+ }
+ }
+
+ // Set outputs of executor
for (uint32_t i = 0; i < output_size; i++)
- desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+ {
+ const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (output_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
+ if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
+
+ outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
+ }
+ else
+ {
+ outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[output_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
+ }
+ else
+ {
+ // Allocate buffer of `from` tensors
+ const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ _edge_tensors[from_iodesc]->allocate_buffer();
+ outputs_inter[i] = _edge_tensors[from_iodesc].get();
- executor2->execute(desc2);
+ // Allocate buffer of tensors for type-aware quantization
+ for (const auto &to_iodesc : _edge_map[from_iodesc])
+ {
+ _edge_tensors[from_iodesc]->increase_ref();
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
+ type_aware_quant_tensor->allocate_buffer();
+
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ executor->execute(inputs_inter, outputs_inter);
+
+ _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+ _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ // Release input buffers that are no longer needed
+ for (uint32_t i = 0; i < input_size; i++)
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index == -1)
+ {
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ // Decrease reference count of tensor for type-aware quantization if input tensor is the
+ // tensor
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ _edge_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ else
+ {
+ // Decrease reference count of `from` tensor if input tensor is the `from` tensor
+ const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ _edge_tensors[from_iodesc]->decrease_ref();
+
+ // Decrease reference count of nnpkg inputs
+ if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ // Release output buffers if those buffers are no longer used other executors because of
+ // type-aware quantization
+ // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
+ for (uint32_t i = 0; i < output_size; i++)
+ {
+ auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+
+ // Check if other executors will use the buffer of edge tensor
+ const auto &to_list = _edge_map[from_iodesc];
+ if (to_list.size() == 0)
+ {
+ // This condition means `from_iodesc` tensor is an output of nnpkg
+ continue;
+ }
+
+ bool to_be_release =
+ !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
+ // This condition means another executor uses the buffer of edge tensor
+ return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
+ });
+
+ if (to_be_release)
+ {
+ // This edge tensor's buffer won't be used in other executors
+ // Tensors for type-aware quantization take over the role of this edge tensor instead
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+
+ // Decrease reference count of nnpkg outputs
+ if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
+ }
+ }
}
}
+// modelCount() iterates _executors.
+// It assumes that Compiler will generate Executor for all models and _executors includes all
+// generated Executor.
+// If nnpackage includes model(s) which has no connection and Compiler does not
+// generate Executor for them, modelCount() return less value than real model count.
+uint16_t Executors::modelCount() const
+{
+ uint16_t model_count = 0;
+ for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
+ _executors.end();
+ model_count++)
+ ;
+
+ return model_count;
+}
+
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/src/exec/Executors.h b/runtime/onert/core/src/exec/Executors.h
new file mode 100644
index 000000000..ac7489186
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+#include "IPermuteFunction.h"
+
+namespace std
+{
+
+template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>>
+{
+ size_t
+ operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const
+ noexcept
+ {
+ return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value());
+ }
+};
+
+} // namespace std
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors : public IExecutors
+{
+public:
+ Executors(void) = delete;
+ Executors(std::unique_ptr<ir::ModelEdges> model_edges)
+ : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
+ _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
+ _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
+ _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
+ {
+ for (const auto &edge : _model_edges->edges)
+ {
+ _edge_map[edge.from].emplace_back(edge.to);
+ }
+ }
+ Executors(const Executors &) = delete;
+ Executors(Executors &&) = default;
+ ~Executors() = default;
+
+ // TODO Use Executor index
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ void checkSupportedMultimodel() const;
+ void createEdgeQuantLayers();
+ void CreatePkgIOTensors(const IODescription &desc);
+ void createPkgIOQuantLayers(const IODescription &desc);
+ uint16_t modelCount() const;
+
+private:
+ // TODO Remove this class
+ class PermuteLayer : public exec::IPermuteFunction
+ {
+ public:
+ PermuteLayer(const std::vector<backend::ITensor *> &inputs,
+ const std::vector<backend::ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~PermuteLayer() {}
+ void optimize() override {}
+ };
+
+ class EdgeTensor;
+
+private:
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>>
+ _executors;
+
+ // NOTE _model_edges may use different struct type for executor implementation
+ std::unique_ptr<ir::ModelEdges> _model_edges;
+ std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map;
+
+ /**
+ * @brief Type-aware quantization layers for edges between executors
+ *
+ */
+ // TODO Move variables related to type-aware quantization for edges into compilation stage
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _edge_quant_layers;
+
+ /**
+ * @brief Tensors for type-aware quantization of edges
+ * Key: `to` IODesc, Value: EdgeTensor
+ */
+ //
+ // Q: Why is Key `to` IODesc
+ // A: these tensors are currently created depending on the type of `to`
+ // TODO Unify tensors with the same `from` tensor and same type
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors;
+
+ /**
+ * @brief Tensors for edges between executors that are not related to type-aware quantization
+ * Key: `from` IODesc, Value: EdgeTensor
+ */
+ // Q: Why is Key `from` IODesc
+ // A: `from` can be connected to multiple `to`
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors;
+ /**
+ * @brief Whether type-aware quantization layers for edges between executors are created
+ *
+ */
+ // TODO Remove this member after the creation of type-aware quantization layers for edges
+ // is moved into compilation stage
+ bool _is_created_edge_quant_layers;
+
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_input_quant_layers;
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_output_quant_layers;
+ // Edge tensors of nnpkg inputs/outputs for type-aware quantization
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors;
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors;
+ // IOTensors for user buffer
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors;
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
new file mode 100644
index 000000000..9d548e6dc
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.cc
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Dequantize.h>
+#include "backend/IPortableTensor.h"
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/Shape.h"
+#include <memory>
+#include <misc/polymorphic_downcast.h>
+#include <typeinfo>
+#include "util/Utils.h"
+#include <vector>
+#include <unordered_map>
+
+namespace
+{
+using namespace onert;
+
+inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
+{
+ const ir::Shape shape = tensor->getShape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::cker::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+// Quantize per element
+template <typename InputT, typename OutputT>
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = dst_tensor->data_scale();
+ const auto zero_point = dst_tensor->data_zero_point();
+
+ int min_val = std::numeric_limits<OutputT>::min();
+ int max_val = std::numeric_limits<OutputT>::max();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
+ int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = clamped;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Quantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ dst_tensor->data_scale(), dst_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+// Dequantize per element
+template <typename InputT, typename OutputT>
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = src_tensor->data_scale();
+ const auto zero_point = src_tensor->data_zero_point();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = result;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Dequantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ src_tensor->data_scale(), src_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+template <typename SRC_T, typename DST_T,
+ std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
+ std::is_base_of<backend::ITensor, DST_T>::value,
+ bool> = true>
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+{
+ // TODO Support other types
+ if (src_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (dst_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ quantize<float, uint8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ quantize<float, int8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ quantize<float, int16_t>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported quantization type");
+ break;
+ }
+ }
+ }
+ else if (dst_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ dequantize<uint8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ dequantize<int8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ dequantize<int16_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported dequantization type");
+ break;
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet");
+ }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+void IPermuteFunction::IPermuteFunction::run()
+{
+ // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
+ assert(_src_tensors.size() == _dst_tensors.size());
+ if (_src_tensors_offsets.size() == 0)
+ {
+ _src_tensors_offsets.resize(_src_tensors.size());
+ _dst_tensors_offsets.resize(_dst_tensors.size());
+ }
+ assert(_src_tensors.size() == _src_tensors_offsets.size());
+ assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
+
+ for (size_t i = 0; i < _src_tensors.size(); ++i)
+ {
+ auto src_tensor = _src_tensors.at(i);
+ auto dst_tensor = _dst_tensors.at(i);
+ auto &src_offsets = _src_tensors_offsets.at(i);
+ auto &dst_offsets = _dst_tensors_offsets.at(i);
+ if (src_tensor != dst_tensor)
+ {
+ const auto rank = src_tensor->getShape().rank();
+ permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ }
+ }
+}
+
+void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+ size_t rank, std::vector<size_t> &src_offsets,
+ std::vector<size_t> &dst_offsets)
+{
+ if (src_tensor->total_size() == 0)
+ {
+ assert(dst_tensor->total_size() == 0);
+ return;
+ }
+
+ assert(src_tensor != dst_tensor);
+ if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+ {
+ typeAwareQuantize(src_tensor, dst_tensor);
+ return;
+ }
+
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT32:
+ permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::UINT32:
+ permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT64:
+ permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT16_SYMM:
+ permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ break;
+ }
+}
+
+const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const
+{
+ switch (type)
+ {
+ case ir::DataType::FLOAT32:
+ return typeid(float);
+ case ir::DataType::INT32:
+ return typeid(int32_t);
+ case ir::DataType::UINT32:
+ return typeid(uint32_t);
+ case ir::DataType::INT64:
+ return typeid(int64_t);
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ return typeid(uint8_t);
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ return typeid(int8_t);
+ case ir::DataType::QUANT_INT16_SYMM:
+ return typeid(int16_t);
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index eb54b67ae..e790f3290 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -25,11 +25,7 @@
#include "backend/ITensor.h"
#include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/Shape.h"
#include <memory>
-#include <typeinfo>
-#include "util/Utils.h"
#include <vector>
#include <unordered_map>
@@ -79,31 +75,7 @@ protected:
};
public:
- virtual void run() override
- {
- // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
- assert(_src_tensors.size() == _dst_tensors.size());
- if (_src_tensors_offsets.size() == 0)
- {
- _src_tensors_offsets.resize(_src_tensors.size());
- _dst_tensors_offsets.resize(_dst_tensors.size());
- }
- assert(_src_tensors.size() == _src_tensors_offsets.size());
- assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
-
- for (size_t i = 0; i < _src_tensors.size(); ++i)
- {
- auto src_tensor = _src_tensors.at(i);
- auto dst_tensor = _dst_tensors.at(i);
- auto &src_offsets = _src_tensors_offsets.at(i);
- auto &dst_offsets = _dst_tensors_offsets.at(i);
- if (src_tensor != dst_tensor)
- {
- const auto rank = src_tensor->getShape().rank();
- permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- }
- }
- }
+ virtual void run() override;
virtual void prepare() override { optimize(); }
@@ -111,48 +83,7 @@ public:
protected:
void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
- std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
- {
- if (src_tensor->total_size() == 0)
- {
- assert(dst_tensor->total_size() == 0);
- return;
- }
-
- assert(src_tensor != dst_tensor);
- if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
- throw std::runtime_error("data type does not match");
- switch (src_tensor->data_type())
- {
- case ir::DataType::FLOAT32:
- permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::INT32:
- permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::UINT32:
- permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::QUANT_INT8_ASYMM:
- case ir::DataType::QUANT_INT8_SYMM:
- permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::INT64:
- permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::QUANT_INT16_SYMM:
- permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- break;
- }
- }
+ std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets);
private:
// TODO make src const by proving const access()
@@ -322,31 +253,7 @@ protected:
// NOTE The typeid expression is lvalue expression which refers to an object with static storage
// duration, of the polymorphic type const std::type_info or of some type derived from it.
// So std::type_info is non-copyable
- const std::type_info &underlying_type(ir::DataType type) const
- {
- switch (type)
- {
- case ir::DataType::FLOAT32:
- return typeid(float);
- case ir::DataType::INT32:
- return typeid(int32_t);
- case ir::DataType::UINT32:
- return typeid(uint32_t);
- case ir::DataType::INT64:
- return typeid(int64_t);
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- return typeid(uint8_t);
- case ir::DataType::QUANT_INT8_ASYMM:
- case ir::DataType::QUANT_INT8_SYMM:
- return typeid(int8_t);
- case ir::DataType::QUANT_INT16_SYMM:
- return typeid(int16_t);
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- }
- }
+ const std::type_info &underlying_type(ir::DataType type) const;
protected:
std::vector<backend::ITensor *> _src_tensors;
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
new file mode 100644
index 000000000..1009f194d
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
@@ -0,0 +1,902 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <ir/Layout.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace exec;
+
+class MockUpTensor : public ITensor
+{
+public:
+ MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad)
+ : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout)
+ {
+ _strides.resize(shape.rank());
+
+ std::vector<size_t> pads(shape.rank(), 0);
+ pads[shape.rank() - 1] = pad;
+ size_t stride = 1;
+ for (int32_t i = _shape.rank() - 1; i >= 0; --i)
+ {
+ _strides.at(i) = stride;
+ stride = stride * (_shape.dim(i) + pads.at(i));
+ }
+ }
+ virtual ~MockUpTensor() {}
+
+ void setBuffer(uint8_t *data) { _data = data; }
+
+ size_t total_size() const override
+ {
+ size_t total_size = _strides[0] * _shape.dim(0);
+ total_size *= sizeOfDataType(data_type());
+ return total_size;
+ }
+
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ size_t offset = 0;
+ for (size_t i = 0; i < _shape.rank(); ++i)
+ {
+ offset += (_strides[i] * coords[i]);
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+ }
+
+ uint8_t *buffer() const override { return _data; }
+
+ ir::Layout layout() const override { return _layout; }
+ ir::DataType data_type() const override { return _type_info.type(); }
+ float data_scale() const override { return _type_info.scale(); }
+ int32_t data_zero_point() const override { return _type_info.zero_point(); }
+ const std::vector<float> &data_scales() const override { return _type_info.scales(); }
+ const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); }
+ bool has_padding() const override
+ {
+ return total_size() / sizeOfDataType(data_type()) != _shape.num_elements();
+ }
+ void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+ bool is_dynamic() const override { return false; }
+ Shape getShape() const override { return _shape; }
+
+private:
+ Shape _shape;
+ TypeInfo _type_info;
+ Layout _layout;
+ uint8_t *_data;
+ std::vector<size_t> _strides;
+};
+
+class MockUpLayer : public IPermuteFunction
+{
+public:
+ MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~MockUpLayer() {}
+ void optimize() override {}
+};
+
+TEST(IPermuteFunction, float_rank1)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1}, {4}, {5}, {2}};
+ float expected_buffer[] = {1, 0, -1, -2, 3};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ Coordinates coords{j};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank2)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ Coordinates coords{j, k};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank3)
+{
+ const size_t input_pads[4] = {0, 5, 0, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ Coordinates coords{j, k, l};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int8_t qsymm8 =
+ *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm16)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int16_t qsymm16 =
+ *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm16 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qasymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<uint8_t>::min();
+ int32_t max_val = std::numeric_limits<uint8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int8_t>::min();
+ int32_t max_val = std::numeric_limits<int8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int8_t qasymm8 =
+ *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm16_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int16_t qasymm8 =
+ *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, asymm8_to_float_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[18];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() +
+ inputs[i]->calcOffset(input_coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc
index 70c9c3dd6..456663f91 100644
--- a/runtime/onert/core/src/exec/ParallelScheduler.cc
+++ b/runtime/onert/core/src/exec/ParallelScheduler.cc
@@ -45,7 +45,7 @@ void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::B
void ParallelScheduler::finish()
{
- for (auto &itr : _thread_pools)
+ for (auto &&itr : _thread_pools)
{
itr.second->finish();
}
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.cc b/runtime/onert/core/src/exec/SingleModelExecutors.cc
new file mode 100644
index 000000000..4b954bab2
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SingleModelExecutors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(subg_index, std::move(exec));
+}
+
+IExecutor *SingleModelExecutors::at(const ir::ModelIndex &,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(subg_index).get();
+}
+
+uint32_t SingleModelExecutors::inputSize() const
+{
+ return entryExecutor()->getInputTensors().size();
+}
+
+uint32_t SingleModelExecutors::outputSize() const
+{
+ return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.h b/runtime/onert/core/src/exec/SingleModelExecutors.h
new file mode 100644
index 000000000..98d629eae
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executor set for single model NN package
+ */
+class SingleModelExecutors : public IExecutors
+{
+public:
+ /**
+ * @brief Construct a new SingleModelExecutors object
+ */
+ SingleModelExecutors(void) = default;
+ SingleModelExecutors(const SingleModelExecutors &) = delete;
+ SingleModelExecutors(SingleModelExecutors &&) = default;
+
+ /**
+ * @brief Destroy the SingleModelExecutors object
+ */
+ ~SingleModelExecutors() = default;
+
+public:
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/ThreadPool.cc b/runtime/onert/core/src/exec/ThreadPool.cc
index c8e0e3265..bf85e59f6 100644
--- a/runtime/onert/core/src/exec/ThreadPool.cc
+++ b/runtime/onert/core/src/exec/ThreadPool.cc
@@ -48,7 +48,7 @@ uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
void ThreadPool::join()
{
- for (auto &thread : _threads)
+ for (auto &&thread : _threads)
{
thread.join();
}
diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h
deleted file mode 100644
index 24938f74f..000000000
--- a/runtime/onert/core/src/interp/Buffer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __ONERT_INTERP_BUFFER_H__
-#define __ONERT_INTERP_BUFFER_H__
-
-#include <memory>
-
-#include "ir/Data.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
- /**
- * @brief Return writable pointer for data area
- * @return Writable pointer
- */
- virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
- InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base.get(); }
- uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
- size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
- ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
- uint8_t *baseWritable(void) const override { return _base; }
-
-private:
- uint8_t *_base;
- size_t _size;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_BUFFER_H__
diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h
deleted file mode 100644
index 7f577ea6e..000000000
--- a/runtime/onert/core/src/interp/ExecEnv.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __ONERT_INTERP_EXEC_ENV_H_
-#define __ONERT_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- * Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
- /**
- * @brief Construct a new Exec Env object (deleted)
- */
- ExecEnv(void) = delete;
- /**
- * @brief Construct a new ExecEnv object
- * @param[in] graph Graph to execute by interpreter
- */
- explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph to execute
- * @return Graph
- */
- const ir::Graph &graph(void) const { return _graph; }
- /**
- * @brief Assign tensor to environment which have allocated or assigned buffer
- * @param[in] index Tensor index
- * @param[in] tensor Tensor
- */
- void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
- {
- assert(tensor->bufferRO() != nullptr);
- _tensors.emplace(index, tensor);
- }
-
- /**
- * @brief Return tensor pointer in environment
- * @param[in] index Tensor index
- * can_optional @c True if tensor can be optional input, otherwise @c false
- * @return Tensor pointer
- */
- const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const
- {
- if (_tensors.find(index) == _tensors.end())
- {
- // It may optional input,
- // otherwise input is not set by runtime user
- if (can_optional)
- {
- return nullptr;
- }
-
- throw std::runtime_error{"ExecEnv: Input is not set"};
- }
-
- return _tensors.at(index).get();
- }
-
- /**
- * @brief Check environment contains tensor
- * @param[in] index Tensor index
- * @return @c true if environment contain tensor, otherwise @c false
- */
- bool contains(const ir::OperandIndex index) const
- {
- return (_tensors.find(index) != _tensors.end());
- }
-
- /**
- * @brief Allocate tensor using operand info
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @note If already allocated, just return
- * @TODO More smart allocation policy
- */
- void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
- {
- // already allocated, or constant
- if (contains(index))
- {
- return;
- }
-
- // Buffer from external (ex. model output)
- auto tensor = std::make_shared<Tensor>(info);
- if (isExtBuffer(index))
- {
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
-
- return;
- }
-
- tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
-
- /**
- * @brief Allocate read-only tensor and share data with other tensor
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @param[in] index_to_share Tensor index that have data to share
- */
- void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
- const ir::OperandIndex index_to_share)
- {
- if (!contains(index_to_share))
- {
- throw std::runtime_error{"Cannot find tensor to share data"};
- }
-
- // already allocated
- if (contains(index))
- {
- return;
- }
-
- if (isExtBuffer(index))
- {
- auto tensor = std::make_shared<Tensor>(info);
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
- }
- else
- {
- auto tensor = std::make_shared<ROTensor>(info);
- tensor->setData(tensorAt(index_to_share)->shareData());
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
- }
-
- /**
- * @brief Free buffer if allocated by allocateIfNeed
- * @param[in] index Tensor index
- * @note If allocated by outside, just return
- */
- void freeIfAllocated(const ir::OperandIndex index)
- {
- if (_buffers.find(index) != _buffers.end())
- {
- _tensors.at(index)->releaseData();
- }
- }
-
- /**
- * @brief Assign ExternalBuffer into external buffer map
- * @param[in] index Tensor index
- * @param[in] buffer External buffer
- */
- void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer)
- {
- _external_buffers.emplace(index, buffer);
- }
-
-private:
- bool isExtBuffer(const ir::OperandIndex index)
- {
- return (_external_buffers.find(index) != _external_buffers.end());
- }
-
-private:
- const ir::Graph &_graph;
- // Tensor map to use in interpreter
- // It should map tensors that have allocated or assigned buffer pointer
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
- // Tensors allocated by allocateIfNeed (buffer)
- std::unordered_set<ir::OperandIndex> _buffers;
- // Tensor buffer from external
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_EXEC_ENV_H_
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
deleted file mode 100644
index f04777174..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "ExecEnv.h"
-#include "Interpreter.h"
-
-#include "util/logging.h"
-
-#include <memory>
-
-namespace onert
-{
-namespace interp
-{
-
-void InterpExecutor::execute(const exec::IODescription &desc)
-{
- /************************************************************************
- * Prepare execution model (submodel)
- It may execute divided model
- but now consider model inference is done at interpreter
- ***********************************************************************/
- ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
- for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto input_index = _graph.getInputs().at(index);
-
- const auto input = desc.inputs.at(n).get();
- if (input == nullptr)
- {
- // Optional input
- continue;
- }
-
- auto input_tensor = std::make_shared<ROTensor>(input->info);
- input_tensor->setData(std::make_shared<const ir::ExternalData>(
- reinterpret_cast<const uint8_t *>(input->buffer), input->size));
- tensor_map[input_index] = input_tensor;
- }
-
- /************************************************************************
- * Prepare execution environment
- Execution environment will be assigned to invoked interpreter instance
- ***********************************************************************/
-
- std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
-
- // Assign input/output tensor into interpreter execution environment
- for (auto index : _graph.getInputs())
- {
- if (tensor_map.find(index) != tensor_map.end())
- {
- VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index << std::endl;
- interp_env->assignTensor(index, tensor_map.at(index));
- }
- }
-
- for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto output_index = _graph.getOutputs().at(index);
- const auto output = desc.outputs.at(n).get();
- if (output == nullptr)
- {
- // Optional output
- continue;
- }
-
- VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value()
- << std::endl;
-
- interp_env->assignExternalBuffer(
- output_index,
- std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), output->size));
- }
-
- // Allocate constant tensor
- _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind
- << std::endl;
-
- assert(obj.data());
- auto const_tensor = std::make_shared<ROTensor>(obj.info());
- // Assume that interpreter's tensor layout is same with model (NHWC)
- const_tensor->setData(
- std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
- interp_env->assignTensor(ind, const_tensor);
- }
- });
-
- /*****************************************************************************
- * Invoke interpreter
- ****************************************************************************/
-
- interp::Interpreter interp(std::move(interp_env));
- interp.run();
-
- /*****************************************************************************
- * Invoked interpreter run is finished
- ****************************************************************************/
-
- // If interpreter execute submodel
- // 1. Get tensor output of submodel into tensor_map to save result
- // 2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
deleted file mode 100644
index d6d5dd0a3..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file InterpExecutor.h
- * @brief This file contains InterpExecutor class\n
- * to manage interpreter execution and environment
- */
-#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
-#define __ONERT_INTERP_INTERP_EXECUTOR_H__
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-class ITensor;
-
-/**
- * @brief Class to execute model using interpreter
- */
-class InterpExecutor final : public exec::IExecutor
-{
-public:
- explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph object
- * @return Graph object
- */
- const ir::Graph &graph() final { return _graph; }
-
- const ir::Graph &parent_graph() final
- {
- throw new std::runtime_error{"Interpreter does not support this function."};
- }
- void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
- // Not implemented
- };
- /**
- * @brief Start execution
- * @note It should be called after setting input and output buffer
- */
- void execute(const exec::IODescription &desc) final;
- void execute(const std::vector<backend::IPortableTensor *> &,
- const std::vector<backend::IPortableTensor *> &) final
- {
- throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
- }
- const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const final
- {
- throw new std::runtime_error{"Interpreter does not support this function."};
- }
-
-private:
- /**
- * @brief Copy of target graph for lowering
- * @note It uses copy of graph, not reference.
- * Original graph may be deallocated by frontend.
- */
- const ir::Graph _graph;
- ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
diff --git a/runtime/onert/core/src/interp/InterpExecutor.test.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc
deleted file mode 100644
index 9f95ffee0..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.test.cc
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "exec/Execution.h"
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using Executors = onert::exec::Executors;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
- virtual void SetUp() {}
- void CreateSimpleModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void CreateTwoStepModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // 1st add operands (result1 <= lhs + rhs1)
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs1 = _graph->addOperand(shape, type);
- auto operand_result1 = _graph->addOperand(shape, type);
- auto operand_rhs2 = _graph->addOperand(shape, type);
- auto operand_result2 = _graph->addOperand(shape, type);
- _graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
- // 2nd add operations (result2 <= result1 + rhs2)
-
- operation::BinaryArithmetic::Param param1;
- param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
- operation::BinaryArithmetic::Param param2;
- param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs1);
- _graph->getOutputs().append(operand_result2);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void CreateUnspecifiedDimensionsModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, unknown, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 0, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
-
- auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
- _graph->operands()
- .at(operand_activation)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
- virtual void TearDown() { _executors = nullptr; }
-
- std::shared_ptr<Graph> _graph{nullptr};
- std::shared_ptr<Executors> _executors{nullptr};
- std::unique_ptr<Execution> _execution{nullptr};
- const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
- Graph graph;
- graph.verify();
- auto executor = std::make_unique<InterpExecutor>(graph);
- ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
- CreateSimpleModel();
- ASSERT_NE(_executors, nullptr);
- ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, neg_setInput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 2);
- EXPECT_EQ(output_buffer[1], -3);
- EXPECT_EQ(output_buffer[2], 1);
- EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
- CreateTwoStepModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 5);
- EXPECT_EQ(output_buffer[1], -2);
- EXPECT_EQ(output_buffer[2], 0);
- EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
deleted file mode 100644
index 0714df38a..000000000
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INTERP_OP
-#error Define INTERP_OP before including this file
-#endif
-
-// Supported operation name in interpreter
-//
-// Same list with Operations.lst
-// Make comment out if operation is not supported in interpreter
-INTERP_OP(BinaryArithmetic)
-//INTERP_OP(BatchToSpaceND)
-//INTERP_OP(Cast)
-INTERP_OP(Conv2D)
-INTERP_OP(DepthwiseConv2D)
-INTERP_OP(Pool2D)
-INTERP_OP(Concat)
-INTERP_OP(FullyConnected)
-//INTERP_OP(Reduce)
-INTERP_OP(Reshape)
-INTERP_OP(Softmax)
-//INTERP_OP(Squeeze)
-//INTERP_OP(Slice)
-//INTERP_OP(StridedSlice)
-INTERP_OP(ElementwiseActivation)
-//INTERP_OP(Transpose)
-//INTERP_OP(Exp)
-//INTERP_OP(Comparison)
-//INTERP_OP(LogicalNot)
-//INTERP_OP(LSTM)
-//INTERP_OP(RSQRT)
-//INTERP_OP(ResizeBilinear)
-//INTERP_OP(RNN)
-//INTERP_OP(Floor)
-//INTERP_OP(SpaceToBatchND)
-//INTERP_OP(SpaceToDepth)
-//INTERP_OP(EmbeddingLookup)
-//INTERP_OP(L2Normalization)
-//INTERP_OP(HashtableLookup)
-INTERP_OP(InstanceNorm)
-//INTERP_OP(PReLU)
-INTERP_OP(TransposeConv)
-//INTERP_OP(SQRT)
-//INTERP_OP(SquaredDifference)
-//INTERP_OP(TopKV2)
-INTERP_OP(Gather)
-//INTERP_OP(Neg)
-//INTERP_OP(Abs)
-//INTERP_OP(ArgMax)
-//INTERP_OP(Dequantize)
-//INTERP_OP(LocalResponseNormalization)
-//INTERP_OP(DepthToSpace)
-//INTERP_OP(Pack)
-//INTERP_OP(Split)
-//INTERP_OP(Unpack)
-INTERP_OP(Pad)
-//INTERP_OP(Custom)
-//INTERP_OP(Permute)
-//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc
deleted file mode 100644
index e01afb8a6..000000000
--- a/runtime/onert/core/src/interp/Interpreter.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor
-{
-public:
- OperationExecutor(ExecEnv *env) : _env{env}
- {
-#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
- }
-
- void execute(const ir::OperationIndex &idx)
- {
- const ir::Operation &node = _env->graph().operations().at(idx);
- const auto nodeName = node.name();
- VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
- << " operation (id: " << idx << ")" << std::endl;
-
- const auto nodeOpCode = node.opcode();
- if (_kernels.find(nodeOpCode) == _kernels.end())
- {
- throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"};
- }
-
- if (_kernels[nodeOpCode]->prepare != nullptr)
- {
- _kernels[nodeOpCode]->prepare(_env, node);
- }
- _kernels[nodeOpCode]->invoke(_env, node);
- }
-
-private:
- ExecEnv *_env;
- std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
- VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
- // operand_stack: save operands prepared to use
- std::stack<ir::OperandIndex> operand_stack;
-
- // Note: We should push input first, then constant.
- // We use use-def for find operators ready to execution,
- // but Use-Def cannot handle parameters (maybe constant, but not always)
- // Note: If all model inputs are constant, it may not work (depend on tensors' order).
- // But that scenario may not exist
- for (auto ind : _env->graph().getInputs())
- {
- VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind << std::endl;
-
- operand_stack.push(ind);
- }
-
- _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind << std::endl;
-
- operand_stack.push(ind);
- }
- });
-
- // Execution
- std::unordered_set<ir::OperandIndex> ready_check;
- std::unordered_set<ir::OperationIndex> executed;
- OperationExecutor executor{_env.get()};
- while (!operand_stack.empty())
- {
- const auto current_operand_index = operand_stack.top();
- operand_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
- << " is checked ready to use" << std::endl;
-
- assert(ready_check.find(current_operand_index) == ready_check.end());
- ready_check.insert(current_operand_index);
-
- // Find prepared operations by scan use of current operand
- std::stack<ir::OperationIndex> operation_stack;
- const auto use_operators = _env->graph().operands().at(current_operand_index).getUses();
- for (const auto &use_operator : use_operators)
- {
- // Assumption: all parameters are ready to use
- bool operator_ready = true;
- for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
- {
- if (ready_check.find(input_index) == ready_check.end())
- {
- operator_ready = false;
- break;
- }
- }
-
- if (operator_ready)
- {
- VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator << std::endl;
- operation_stack.push(use_operator);
- }
- }
-
- while (!operation_stack.empty())
- {
- const auto current_operation_index = operation_stack.top();
- operation_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index << "("
- << _env->graph().operations().at(current_operation_index).name() << ")"
- << std::endl;
-
- // execution
- // 1. Prepare output tensor
- // 2. Call operation kernel
- executor.execute(current_operation_index);
- executed.insert(current_operation_index);
-
- // 3. Push each output into operand stack
- const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
- for (auto def_operand : def_operands)
- {
- VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
- << std::endl;
- operand_stack.push(def_operand);
- }
-
- // 4. Free if lifetime of buffer operands used by input is finished
- for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
- {
- const auto use_operators = _env->graph().operands().at(input_index).getUses();
- bool dead_buffer = true;
- for (const auto &use_operator : use_operators)
- {
- if (executed.find(use_operator) == executed.end())
- {
- dead_buffer = false;
- break;
- }
- }
-
- if (dead_buffer)
- {
- _env->freeIfAllocated(input_index);
- }
- }
- }
- }
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h
deleted file mode 100644
index d2165f538..000000000
--- a/runtime/onert/core/src/interp/Interpreter.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __ONERT_INTERP_INTERPRETER_H__
-#define __ONERT_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
- /**
- * @brief Construct a new Interpreter object (deleted)
- */
- Interpreter() = delete;
- /**
- * @brief Construct a new Interpreter object
- * @param[in] env Execution environment variable for interpreter object
- */
- Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run interpreter until there is no operation to execute
- */
- void run();
-
-private:
- std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERPRETER_H__
diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h
deleted file mode 100644
index 956b92a53..000000000
--- a/runtime/onert/core/src/interp/Registration.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_REGISTRATION_H__
-#define __ONERT_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace interp
-{
-
-struct OpKernel
-{
- std::function<void(ExecEnv *, const ir::Operation &)> prepare;
- std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-#define INTERP_OP(InternalName) OpKernel *get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_REGISTRATION_H__
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc
deleted file mode 100644
index de095c9e4..000000000
--- a/runtime/onert/core/src/interp/Tensor.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace onert
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
-
-size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Shape Tensor::getShape() const { return _info.shape(); }
-
-ir::Shape ROTensor::getShape() const { return _info.shape(); }
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
deleted file mode 100644
index 642fdc164..000000000
--- a/runtime/onert/core/src/interp/Tensor.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __ONERT_INTERP_TENSOR_H__
-#define __ONERT_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/ITensor.h"
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::ITensor
-{
-public:
- virtual ~ITensor() = default;
-
-public:
- virtual uint8_t *buffer() const = 0;
- /**
- * @brief Return shared pointer for buffer
- * @return Buffer shared pointer
- */
- virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
- /**
- * @brief Return read-only buffer pointer
- * @return Read-only buffer pointer
- */
- virtual const uint8_t *bufferRO() const = 0;
- /**
- * @brief Return shared pointer for data
- * @return Data shared pointer
- */
- virtual std::shared_ptr<const ir::Data> shareData() const = 0;
- /**
- * @brief Set internal/external buffer
- * @param[in] buffer Buffer pointer
- */
- virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
- /**
- * @brief Set data reference (including constant, input)
- * @param[in] data Data pointer
- */
- virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
- virtual void releaseData() = 0;
-
- virtual size_t total_size() const = 0;
- virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-
- virtual bool has_padding() const = 0;
- /**
- * @brief Return data type of tensor
- * @return Data type of tensor
- */
- virtual ir::DataType data_type() const = 0;
- /**
- * @brief Return TensorInfo
- * @return TensorInfo
- */
- virtual const ir::OperandInfo &tensorInfo() const = 0;
- /**
- * @brief Return number of elements
- * @return Number of elements
- */
- virtual uint64_t num_elements() const = 0;
- void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
- ROTensor() = delete;
- ROTensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
- std::shared_ptr<const Buffer> shareBuffer() const override
- {
- throw std::runtime_error{"Read only tensor"};
- }
- const uint8_t *bufferRO() const override { return _data->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _data; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
- void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
- void releaseData() override { _data = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
- const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
- const std::vector<int32_t> &data_zero_points() const override
- {
- return _info.typeInfo().zero_points();
- }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
- ir::Shape getShape() const override;
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
- Tensor() = delete;
- Tensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { return _buffer->baseWritable(); }
- std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
- const uint8_t *bufferRO() const override { return _buffer->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
- void setData(std::shared_ptr<const ir::Data>) override
- {
- throw std::runtime_error{"Passed data may read-only"};
- }
- void releaseData() override { _buffer = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
- const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
- const std::vector<int32_t> &data_zero_points() const override
- {
- return _info.typeInfo().zero_points();
- }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
- ir::Shape getShape() const override;
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_TENSOR_H__
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
deleted file mode 100644
index fe4acd309..000000000
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-#include <cker/Types.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
- ADD,
- SUB,
- MUL
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != rhs_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
- }
-
- bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
- if (try_broadcast)
- {
- bool success = true;
- auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
- rhs_tensor->tensorInfo().shape(), success);
- if (!success)
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
- }
-
- auto output_info =
- ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
- else
- {
- // Output's shape and type is same with input
- auto output_info = lhs_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != out_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
- }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
- nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->quantized_activation_min = min;
- params->quantized_activation_max = max;
-}
-
-template <typename raw_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const ir::operation::BinaryArithmetic::Param &param)
-{
- const auto lhs_buffer = lhs_tensor->bufferRO();
- const auto rhs_buffer = rhs_tensor->bufferRO();
- auto out_buffer = out_tensor->buffer();
-
- nnfw::cker::BinaryArithmeticOpParam cker_param;
- raw_type activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
- setActivationParams(activation_min, activation_max, &cker_param);
- const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
- const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
- raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
- const auto cker_op_type =
- (op_type == OpType::ADD) ? nnfw::cker::BinaryArithmeticOpType::ADD
- : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
- : nnfw::cker::BinaryArithmeticOpType::MUL);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(convertShape(lhs_tensor->tensorInfo().shape()),
- convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
-
- if (need_broadcast)
- {
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape,
- rhs_ptr, out_shape, out_ptr);
- return;
- }
-
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
- out_shape, out_ptr);
-}
-
-template <OpType op_type>
-void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
-{
- const auto lhs_index = node.getInputs().at(node.LHS);
- const auto rhs_index = node.getInputs().at(node.RHS);
- const auto out_index = node.getOutputs().at(0);
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = lhs_tensor->data_type();
-
- if (data_type == ir::DataType::INT32)
- {
- invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else if (data_type == ir::DataType::FLOAT32)
- {
- invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-
-void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- switch (arithmetic_node.param().arithmetic_type)
- {
- case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
- invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
- invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
- invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
- break;
- default:
- throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
- arithmetic_node.name()};
- break;
- }
-}
-
-} // namespace
-
-OpKernel *getBinaryArithmetic()
-{
- static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
deleted file mode 100644
index 103604631..000000000
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Concat.h"
-
-#include <cker/operation/Concatenation.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
- const auto first_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto first_tensor = env->tensorAt(first_index);
- uint32_t out_axis_dimension = 0;
- const int32_t axis_raw = concat_node.param().axis;
- const int32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->getShape().rank()) : axis_raw;
-
- // All inputs shape should be same except axis dimension
- // All inputs type should be same
- for (auto input : node.getInputs())
- {
- assert(first_tensor->getShape().rank() == env->tensorAt(input)->getShape().rank());
- assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
- for (int i = 0; i < first_tensor->getShape().rank(); i++)
- {
- if (i == axis)
- {
- out_axis_dimension += env->tensorAt(input)->getShape().dim(i);
- continue;
- }
- assert(first_tensor->getShape().dim(i) == env->tensorAt(input)->getShape().dim(i));
- }
- }
-
- // Make output tensor info using first input tensor info, and accumulated axis dimension value
- auto out_shape = first_tensor->tensorInfo().shape();
- out_shape.dim(axis) = out_axis_dimension;
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(out_shape, first_tensor->tensorInfo().typeInfo()));
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Output shape should be same with input except axis getShape().dim
- // Output type should be same with input
- assert(first_tensor->data_type() == out_tensor->data_type());
- for (int i = 0; i < first_tensor->getShape().rank(); i++)
- {
- if (i == axis)
- {
- continue;
- }
- assert(first_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
- }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
- const uint32_t count = in_tensors.size();
-
- // Calculate
- nnfw::cker::ConcatenationParams cker_param;
- cker_param.axis = (int8_t)axis;
- cker_param.inputs_count = count;
-
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- std::vector<nnfw::cker::Shape> in_shapes;
- std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
- in_shapes.reserve(count);
- in_shape_ptrs.reserve(count);
- std::vector<const float *> in_ptrs;
- for (uint32_t i = 0; i < count; i++)
- {
- in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
- in_shape_ptrs.push_back(&in_shapes[i]);
- in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
- }
-
- auto out_buffer = out_tensor->buffer();
- float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
- nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
- out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
- const int32_t axis_raw = concat_node.param().axis;
-
- std::vector<const ITensor *> in_tensors;
- for (const auto &e : concat_node.getInputs())
- {
- in_tensors.emplace_back(env->tensorAt(e));
- }
-
- const auto out_index = node.getOutputs().at(0);
- const auto out_tensor = env->tensorAt(out_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->getShape().rank()) : axis_raw;
-
- const auto data_type = in_tensors[0]->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensors, out_tensor, axis);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
- static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
deleted file mode 100644
index 72c2057c2..000000000
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Conv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/Conv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->getShape().rank() == 4);
- assert(kernel_tensor->getShape().rank() == 4);
- assert(bias_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
- const auto infered_output_shape = shape_inference::inferConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
-{
- // TODO Support NCHW frontned
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::ConvParams cker_param;
- cker_param.padding_type = convertPaddingType(param.padding.type);
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::Conv conv_kernel;
- conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
- bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
- static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
deleted file mode 100644
index 9f527440e..000000000
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->getShape().rank() == 4);
- assert(kernel_tensor->getShape().rank() == 4);
- assert(bias_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &depth_conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
- const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
- depth_conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
-{
- // TODO Support NCHW frontend
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::DepthwiseConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.depth_multiplier = param.multiplier;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv2D()
-{
- static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
deleted file mode 100644
index e13080e76..000000000
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/ElementwiseActivation.h"
-
-#include <cker/operation/Logistic.h>
-#include <cker/operation/Tanh.h>
-#include <misc/polymorphic_downcast.h>
-
-#include <cmath>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
- Logistic,
- ReLU,
- Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- // Output's shape and type is same with input
- auto input_info = input_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(output_index, input_info);
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
- }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
- float beta)
-{
- std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
- switch (act_type)
- {
- case ActivationType::ReLU:
- fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
- break;
- case ActivationType::Tanh:
- fn = [](const float &in) { return std::tanh(in); };
- break;
- default:
- throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
- break;
- }
-
- const float *input_end = input_ptr + num_elements;
- for (; input_ptr < input_end; input_ptr++, output_ptr++)
- {
- *output_ptr = fn(*input_ptr);
- }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- uint64_t elements = input_tensor->num_elements();
- const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
- float *out = reinterpret_cast<float *>(output_tensor->buffer());
- if (act_type == ActivationType::Logistic)
- {
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
- }
- else
- {
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
- act_node.param().beta);
- }
- }
- else
- {
- throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
- }
-}
-
-void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- switch (act_node.param().op_type)
- {
- case ir::operation::ElementwiseActivation::Type::LOGISTIC:
- invoke<ActivationType::Logistic>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::RELU:
- invoke<ActivationType::ReLU>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::TANH:
- invoke<ActivationType::Tanh>(env, node);
- break;
- default:
- throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
- }
-}
-
-} // namespace
-
-OpKernel *getElementwiseActivation()
-{
- static OpKernel kernel = {prepare, invokeElementwiseActivation};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
deleted file mode 100644
index 2bc9f517f..000000000
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/FullyConnected.h"
-
-#include <cker/operation/FullyConnected.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- assert(in_tensor->getShape().rank() >= 2);
- assert(kernel_tensor->getShape().rank() == 2);
- assert(bias_tensor->getShape().rank() == 1);
-
- const auto input_size_with_batch = in_tensor->num_elements();
- const auto num_units = kernel_tensor->getShape().dim(0);
- const auto input_size = kernel_tensor->getShape().dim(1);
- const int32_t batch_size = input_size_with_batch / input_size;
- assert(input_size_with_batch % input_size == 0);
- assert(num_units == bias_tensor->getShape().dim(0));
-
- // Make output tensor info
- ir::Shape output_shape(2);
- output_shape.dim(0) = batch_size;
- output_shape.dim(1) = num_units;
- const auto out_info =
- ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
- env->allocateIfNeeded(out_index, out_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 2);
- assert(out_tensor->getShape().dim(0) == batch_size);
- assert(out_tensor->getShape().dim(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
-{
- const auto ifm_buffer = ifm_tensor->bufferRO();
- const auto ker_buffer = ker_tensor->bufferRO();
- const auto bias_buffer = bias_tensor->bufferRO();
- auto ofm_buffer = ofm_tensor->buffer();
-
- // Calculate
- nnfw::cker::FullyConnectedParams cker_param;
- cker_param.activation = convertActivationType(param.activation);
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
- const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
- const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
- float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
- nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
- static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
deleted file mode 100644
index d686cfcf6..000000000
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Gather.h"
-
-#include <cker/operation/Gather.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- if (indices_tensor->data_type() != ir::DataType::INT32)
- {
- throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
- }
-
- auto output_tensor = env->tensorAt(output_index);
- auto output_rank = input_tensor->getShape().rank() + indices_tensor->getShape().rank() - 1;
-
- if (output_rank != output_tensor->getShape().rank())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output rank"};
- }
- if (output_tensor->data_type() != input_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output data type"};
- }
-
- if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM &&
- input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
- {
- throw std::runtime_error{
- "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
- }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
- const ITensor *output_tensor, uint32_t axis)
-{
- // Calculate
- nnfw::cker::GatherParams cker_param;
- cker_param.axis = (int8_t)axis;
-
- const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
- const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
- const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
- raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
- nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
- indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
- const int32_t axis_raw = gather_node.param().axis;
-
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
- const auto output_tensor = env->tensorAt(output_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->getShape().rank()) : axis_raw;
-
- const auto data_type = input_tensor->data_type();
-
- switch (data_type)
- {
- case ir::DataType::FLOAT32:
- invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::INT32:
- invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::QUANT_UINT8_ASYMM:
- invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- default:
- throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
- }
-}
-
-} // namespace
-
-OpKernel *getGather()
-{
- static OpKernel kernel = {prepareGather, invokeGather};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
deleted file mode 100644
index 318088457..000000000
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/InstanceNorm.h"
-
-#include <cker/operation/InstanceNorm.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto output_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
-
- if (input_tensor->getShape().rank() != 4)
- {
- throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
- }
-
- // Output shape should be same with input
- env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
- auto output_tensor = env->tensorAt(output_index);
- UNUSED_RELEASE(output_tensor);
-
- // Handle same ifm & ofm data type only
- assert(input_tensor->data_type() == output_tensor->data_type());
- assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
- const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
-{
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::InstanceNormParams cker_param;
- cker_param.epsilon = param.epsilon;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
- const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
- const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
- const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
- float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
- nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
- cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
- const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
- const auto out_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
- const auto gamma_tensor = env->tensorAt(gamma_index);
- const auto beta_tensor = env->tensorAt(beta_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
- static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h
deleted file mode 100644
index 2fdf098f0..000000000
--- a/runtime/onert/core/src/interp/operations/OperationUtil.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-
-namespace onert
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- std::vector<int32_t> raw_shape;
- raw_shape.resize(dimensions.size());
-
- for (uint32_t i = 0; i < dimensions.size(); ++i)
- {
- raw_shape[i] = dimensions[i];
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- const int32_t extended_rank = 4;
- int32_t raw_shape[extended_rank];
- uint32_t start = extended_rank - dimensions.size();
-
- for (uint32_t i = 0; i < extended_rank; ++i)
- {
- if (i < start)
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = dimensions[i - start];
- }
- }
-
- return nnfw::cker::Shape(extended_rank, raw_shape);
-}
-
-inline nnfw::cker::FusedActivationFunctionType
-convertActivationType(const ir::Activation activation)
-{
- switch (activation)
- {
- case ir::Activation::NONE:
- return nnfw::cker::FusedActivationFunctionType::kNone;
- case ir::Activation::RELU:
- return nnfw::cker::FusedActivationFunctionType::kRelu;
- case ir::Activation::RELU1:
- return nnfw::cker::FusedActivationFunctionType::kRelu1;
- case ir::Activation::RELU6:
- return nnfw::cker::FusedActivationFunctionType::kRelu6;
- default:
- throw std::runtime_error{"CPU backend: Cannot convert activation type"};
- }
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- throw std::runtime_error{"Unsupported activation type"};
- }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
- int lhs_rank = lhs.rank();
- int rhs_rank = rhs.rank();
-
- int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
- ir::Shape out_shape(out_rank);
-
- int lhs_idim = lhs_rank - 1;
- int rhs_idim = rhs_rank - 1;
- success = true;
- for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
- {
- if (lhs_idim == -1 && rhs_idim == -1)
- {
- // invalid result
- success = false;
- break;
- }
-
- if (lhs_idim == -1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- rhs_idim--;
- }
- else if (rhs_idim == -1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- }
- else
- {
- if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (lhs.dim(lhs_idim) == 1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (rhs.dim(rhs_idim) == 1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else
- {
- // invalid result
- success = false;
- break;
- }
- }
- }
-
- if (lhs_idim != -1 || rhs_idim != -1)
- {
- // invalid result
- success = false;
- }
- return out_shape;
-}
-
-inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
-{
- switch (ir_padding_type)
- {
- case ir::PaddingType::EXPLICIT:
- return nnfw::cker::PaddingType::kNone;
- case ir::PaddingType::SAME:
- return nnfw::cker::PaddingType::kSame;
- case ir::PaddingType::VALID:
- return nnfw::cker::PaddingType::kValid;
- default:
- throw std::runtime_error("Wrong padding type.");
- break;
- }
-}
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
deleted file mode 100644
index 3db0828eb..000000000
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pad.h"
-
-#include <cker/operation/Pad.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Pad): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- const auto pad_buffer = pad_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- int32_t pad_rank = pad_tensor->getShape().dim(0);
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
- output_ptr, nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto pad_tensor = env->tensorAt(pad_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, pad_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getPad()
-{
- static OpKernel kernel = {preparePad, invokePad};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
deleted file mode 100644
index 3935d4756..000000000
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pool2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace pool2d
-{
-
-void preparePool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
- const auto in_index = node.getInputs().at(pool_node.INPUT);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert(in_tensor->getShape().rank() == 4);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto infered_output_shape =
- shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-template <typename T>
-void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
- const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
- ir::operation::Pool2D::PoolType op_type)
-{
- switch (op_type)
- {
- case ir::operation::Pool2D::PoolType::AVG:
- nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- case ir::operation::Pool2D::PoolType::MAX:
- nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- default:
- throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
- break;
- }
-}
-
-void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto param = pool_node.param();
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
- // Now, invoke() supports only Pool2D in float
- invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace pool2d
-
-OpKernel *getPool2D()
-{
- static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
deleted file mode 100644
index 1de5a5762..000000000
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../Registration.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Unspecified shape is not supported in operation node spec now
- const auto output_info = env->graph().operands().at(out_index).info();
- env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
- assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
- {
- // Same data
- return;
- }
-
- const auto output_info = env->graph().operands().at(out_index).info();
- memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
- output_info.total_size());
-}
-
-} // namespace
-
-OpKernel *getReshape()
-{
- static OpKernel kernel = {prepare, invoke};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
deleted file mode 100644
index 8be2f2210..000000000
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Softmax.h"
-
-#include <cker/operation/SoftMax.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert((in_tensor->getShape().rank() == 4) || (in_tensor->getShape().rank() == 2));
-
- // Output shape should be same with input
- // Output type is pre-defined in model
- const auto output_shape = env->graph().operands().at(in_index).info().shape();
- const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
- const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type);
- env->allocateIfNeeded(out_index, output_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Check output shape is same with input
- assert(out_tensor->getShape().rank() == out_tensor->getShape().rank());
- for (int32_t i = 0; i < in_tensor->getShape().rank(); i++)
- {
- assert(in_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
- }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::Softmax::Param &param)
-{
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- float beta = param.beta;
-
- if (in_tensor->getShape().rank() == 2)
- {
- uint32_t batch_size = in_tensor->getShape().dim(0);
- uint32_t input_size = in_tensor->getShape().dim(1);
-
- nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
- }
- else if (in_tensor->getShape().rank() == 4)
- {
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- nnfw::cker::SoftmaxParams cker_param;
- cker_param.beta = beta;
-
- nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
- }
- else
- {
- throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
- }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto in_data_type = in_tensor->data_type();
- const auto out_data_type = out_tensor->data_type();
- if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
- {
- invoke(in_tensor, out_tensor, softmax_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getSoftmax()
-{
- static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
deleted file mode 100644
index 59c8e8cdf..000000000
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/TransposeConv.h"
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
- assert(ifm_tensor->getShape().rank() == 4);
- assert(ker_tensor->getShape().rank() == 4);
- assert(ofm_shape_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(ifm_tensor);
- UNUSED_RELEASE(ker_tensor);
- UNUSED_RELEASE(ofm_shape_tensor);
-
- const auto output_info = env->graph().operands().at(ofm_index).info();
- if (output_info.total_size() == 0)
- {
- // TODO: Handle unspecified output shape
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(ofm_index, output_info);
- }
-
- auto ofm_tensor = env->tensorAt(ofm_index);
- UNUSED_RELEASE(ofm_tensor);
-
- // Handle same ifm & ofm data type only
- if (ifm_tensor->data_type() != ofm_tensor->data_type())
- {
- throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
- }
-
- if (ofm_tensor->getShape().rank() != 4)
- {
- throw std::runtime_error{"Interp(TConv): Invalid output rank"};
- }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
- const ir::operation::TransposeConv::Param &param)
-{
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, ker_width, ker_height);
-
- nnfw::cker::TransposeConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &tconv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
- }
- else
- {
- throw std::runtime_error{"Interp(TConv): Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getTransposeConv()
-{
- static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc
index a7c50a266..e4e4c154b 100644
--- a/runtime/onert/core/src/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -26,10 +26,10 @@ namespace onert
namespace ir
{
-int32_t const Shape::UNSPECIFIED_DIM = -1;
+int32_t const Shape::kUnspecifiedDim = -1;
// NNFW_MAX_RANK is 6
-int32_t const Shape::MAX_RANK = 6;
+int32_t const Shape::kMaxRank = 6;
FeatureShape Shape::asFeature(Layout layout) const
{
@@ -80,7 +80,7 @@ uint64_t Shape::num_elements() const
{
// if dimension is 0, it means unspecified and cannot calculate the total number of elements
if (std::any_of(_dimensions.begin(), _dimensions.end(),
- [](const int32_t &v) { return v == UNSPECIFIED_DIM; }))
+ [](const int32_t &v) { return v == kUnspecifiedDim; }))
throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified");
return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
@@ -89,7 +89,7 @@ uint64_t Shape::num_elements() const
Shape permuteShape(const Shape &shape, Layout from, Layout to)
{
- assert(shape.rank() <= Shape::MAX_RANK);
+ assert(shape.rank() <= Shape::kMaxRank);
Shape ret{shape};
if (from == to)
return ret;
diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc
index afdb29254..4788522d3 100644
--- a/runtime/onert/core/src/ir/Shape.test.cc
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -48,7 +48,7 @@ TEST(ShapeTest, neg_basic_test)
onert::ir::Shape shape(2);
shape.dim(0) = 1;
- shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+ shape.dim(1) = onert::ir::Shape::kUnspecifiedDim;
ASSERT_EQ(shape.rank(), 2);
ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
index d868efedf..c3f5179df 100644
--- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -168,7 +168,7 @@ void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>
_os << "{\n";
_os << " " << quote("traceEvents") << ": [\n";
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
flushOneRecord(*recorder);
}
@@ -180,7 +180,7 @@ void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>
void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
{
- for (auto &evt : recorder.duration_events())
+ for (const auto &evt : recorder.duration_events())
{
const std::string name = getLabel(*evt);
const std::string tid = getTid(*evt);
@@ -188,7 +188,7 @@ void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
_os << " " << object(*evt, name, tid) << ",\n";
}
- for (auto &evt : recorder.counter_events())
+ for (const auto &evt : recorder.counter_events())
{
_os << " " << object(evt) << ",\n";
}
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
index 7a8b9f234..13dab5b77 100644
--- a/runtime/onert/core/src/util/MDTableEventWriter.cc
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -32,7 +32,7 @@ namespace
void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
{
os << "| ";
- for (auto &key : list)
+ for (const auto &key : list)
{
os << key << " | ";
}
@@ -227,7 +227,7 @@ struct MDTableBuilder
MDTableBuilder &build()
{
- for (auto &it : divideGraph())
+ for (const auto &it : divideGraph())
{
size_t begin_idx = it.first;
size_t end_idx = it.second;
@@ -314,7 +314,7 @@ struct MDTableBuilder
graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
graph.setOperations(name_to_op);
- for (auto &arg : _duration_events[end_idx]->args)
+ for (const auto &arg : _duration_events[end_idx]->args)
{
if (arg.first == "session")
graph.session_index = arg.second;
@@ -358,7 +358,7 @@ struct MDTableBuilder
void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
{
- for (auto &recorder : records)
+ for (const auto &recorder : records)
{
MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
index 4dea6d16c..87bbfc662 100644
--- a/runtime/onert/core/src/util/SNPEEventWriter.cc
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -103,9 +103,9 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
// Memory
{
std::unordered_map<std::string, Stat> mem_stats;
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
- for (auto &evt : recorder->counter_events())
+ for (const auto &evt : recorder->counter_events())
{
auto &mem_stat = mem_stats[evt.name];
uint64_t val = std::stoull(evt.values.at("value"));
@@ -114,7 +114,7 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
}
auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
- for (auto &kv : mem_stats)
+ for (const auto &kv : mem_stats)
{
auto &key = kv.first;
auto &val = kv.second;
@@ -132,9 +132,9 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
// 2D keys : stats[tid][name]
std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
- for (auto &evt : recorder->duration_events())
+ for (const auto &evt : recorder->duration_events())
{
std::string evt_name = getLabel(*evt);
std::string evt_tid = getBackend(*evt);
@@ -160,17 +160,17 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
}
}
- for (auto &kv : begin_timestamps)
- for (auto &kv2 : kv.second)
+ for (const auto &kv : begin_timestamps)
+ for (const auto &kv2 : kv.second)
if (kv2.second != 0)
throw std::runtime_error{"Invalid Data - B and E pair does not match."};
- for (auto &kv : stats)
+ for (const auto &kv : stats)
{
- auto &tid = kv.first;
- auto &map = kv.second;
+ const auto &tid = kv.first;
+ const auto &map = kv.second;
auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
- for (auto &kv : map)
+ for (const auto &kv : map)
{
auto &name = kv.first;
auto &val = kv.second;
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 173de29c7..862d6f725 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -608,12 +608,12 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
const size_t total_num_elements)
{
ir::Shape ret(shape_num_elements);
- int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM;
+ int32_t flatten_dim = ir::Shape::kUnspecifiedDim;
for (int32_t i = 0; i < shape_num_elements; ++i)
{
if (shape_buf[i] < 0)
{
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice");
flatten_dim = i;
ret.dim(i) = 1;
@@ -623,7 +623,7 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
ret.dim(i) = shape_buf[i];
}
}
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
ret.dim(flatten_dim) = total_num_elements / ret.num_elements();
// Check reshapable
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index cf080abbc..878a594cc 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -68,8 +68,7 @@ public:
* @param model reference to model
*/
explicit BaseLoader(std::unique_ptr<ir::Model> &model)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
- _tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
}
@@ -194,7 +193,7 @@ protected:
const Model *_domain_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
+ std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
// Boolean flag to use MMAPED_DATA
@@ -411,7 +410,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
subg.setOperandValue(operand_index, std::move(data_obj));
}
- _tensor_names->emplace(operand_index, tensor->name()->str());
+ _tensor_names.emplace(operand_index, tensor->name()->str());
// Variable
if (tensor->is_variable())
@@ -1297,8 +1296,8 @@ void BaseLoader<LoaderDomain>::loadIf(const Operator *op, ir::Graph &subg)
verifySubgraphIndex(else_index);
ir::operation::If::Param param;
- param.then_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(then_index)};
- param.else_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(else_index)};
+ param.then_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(then_index)};
+ param.else_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(else_index)};
loadOperationTo<ir::operation::If>(op, subg, param);
}
@@ -1314,8 +1313,8 @@ void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
verifySubgraphIndex(body_index);
ir::operation::While::Param param;
- param.cond_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(cond_index)};
- param.body_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(body_index)};
+ param.cond_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(cond_index)};
+ param.body_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(body_index)};
loadOperationTo<ir::operation::While>(op, subg, param);
}
@@ -1663,6 +1662,12 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
loadDepthToSpace(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_EMBEDDING_LOOKUP:
+ loadOperationTo<ir::operation::EmbeddingLookup>(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_HASHTABLE_LOOKUP:
+ loadOperationTo<ir::operation::HashtableLookup>(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
@@ -1682,10 +1687,15 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
// Load subgraphs and map operations on subgraph
const auto subgraphs = _domain_model->subgraphs();
auto model = std::make_unique<ir::Model>();
- for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
+ if (subgraphs->size() - 1 > ir::SubgraphIndex::max())
+ throw std::runtime_error{"The number of subgraphs cannot exceed " +
+ std::to_string(ir::SubgraphIndex::max() + 1)};
+ for (uint16_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
{
auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
- model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+ // NOTE: Used () instead of {}, which does not check narrowing.
+ // It is okay since overflow is checked the above if-statement.
+ model->push(ir::SubgraphIndex(subgraph_index), std::move(subg));
}
_model = std::move(model);
}
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 5abcc9cd0..5bf626d6c 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -112,13 +112,13 @@ private:
for (const std::int32_t input_ind : *circle_subg->inputs())
{
subg->addInput(tensorIdxToOperandIdx(input_ind),
- _tensor_names->at(_tensor_to_operand[input_ind]));
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *circle_subg->outputs())
{
subg->addOutput(tensorIdxToOperandIdx(output_ind),
- _tensor_names->at(_tensor_to_operand[output_ind]));
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *circle_subg->operators())
diff --git a/runtime/onert/frontend/nnapi/CMakeLists.txt b/runtime/onert/frontend/nnapi/CMakeLists.txt
index dafd84ccf..b66b32e89 100644
--- a/runtime/onert/frontend/nnapi/CMakeLists.txt
+++ b/runtime/onert/frontend/nnapi/CMakeLists.txt
@@ -24,4 +24,4 @@ target_link_libraries(test_onert_frontend_nnapi PRIVATE ${LIB_ONERT} dl)
target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest)
target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest_main)
-install(TARGETS test_onert_frontend_nnapi DESTINATION unittest_standalone)
+install(TARGETS test_onert_frontend_nnapi DESTINATION unittest)
diff --git a/runtime/onert/frontend/nnapi/compilation.cc b/runtime/onert/frontend/nnapi/compilation.cc
index 871c040ef..2c56f061a 100644
--- a/runtime/onert/frontend/nnapi/compilation.cc
+++ b/runtime/onert/frontend/nnapi/compilation.cc
@@ -58,7 +58,7 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if (compilation->state() != ::onert::compiler::State::CREATED)
+ if (compilation->isFinished())
{
VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
return ANEURALNETWORKS_BAD_STATE;
@@ -87,7 +87,7 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compila
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if (compilation->state() != ::onert::compiler::State::CREATED)
+ if (compilation->isFinished())
{
VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
return ANEURALNETWORKS_BAD_STATE;
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index 19636a84d..4e1a985f3 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- std::shared_ptr<onert::exec::Executors> executors;
+ std::shared_ptr<onert::exec::IExecutors> executors;
compilation->publish(executors);
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index bb247b97f..3b5edc180 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -26,9 +26,7 @@ ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksMode
_compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
{
if (model->allowedToFp16())
- {
- _compiler->enableToFp16();
- }
+ _coptions->enableToFp16();
}
bool ANeuralNetworksCompilation::finish() noexcept
@@ -36,6 +34,7 @@ bool ANeuralNetworksCompilation::finish() noexcept
try
{
_artifact = _compiler->compile();
+ _compiler = nullptr;
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index dff5c6dc6..3898f1d5e 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -22,7 +22,7 @@
#include "compiler/Compiler.h"
#include "ir/Graph.h"
#include "ir/Model.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
@@ -32,9 +32,9 @@ public:
public:
bool finish() noexcept;
+ bool isFinished() noexcept { return _compiler == nullptr; }
- onert::compiler::State state(void) noexcept { return _compiler->state(); }
- void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
+ void publish(std::shared_ptr<onert::exec::IExecutors> &executors) noexcept
{
executors = _artifact ? _artifact->_executors : nullptr;
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 110c7cd55..6fbc4c2e0 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -26,7 +26,7 @@
struct ANeuralNetworksExecution
{
public:
- ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
+ ANeuralNetworksExecution(const std::shared_ptr<onert::exec::IExecutors> &executors)
: _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index fe69e4e2a..dc8564632 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -99,13 +99,13 @@ private:
for (const std::int32_t input_ind : *tflite_subg->inputs())
{
subg->addInput(tensorIdxToOperandIdx(input_ind),
- _tensor_names->at(_tensor_to_operand[input_ind]));
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *tflite_subg->outputs())
{
subg->addOutput(tensorIdxToOperandIdx(output_ind),
- _tensor_names->at(_tensor_to_operand[output_ind]));
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *tflite_subg->operators())
@@ -113,7 +113,6 @@ private:
loadOperation(op, *subg);
}
- subg->setTensorName(_tensor_names);
subg->verify();
return subg;