summaryrefslogtreecommitdiff
path: root/runtime/onert/core/src/compiler/ExecutorFactory.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/core/src/compiler/ExecutorFactory.cc')
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc501
1 files changed, 501 insertions, 0 deletions
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
new file mode 100644
index 000000000..bb325ffbc
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutorFactory.h"
+
+#include <functional>
+#include "exec/ExecutionObservers.h"
+#include "exec/LinearExecutor.h"
+#include "exec/DataflowExecutor.h"
+#include "exec/ParallelExecutor.h"
+#include "compiler/BackendManager.h"
+#include "compiler/ExecutionBuilder.h"
+#include "exec/ExecTime.h"
+#include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
+#include "backend/IConstantInitializer.h"
+#include "backend/IKernelGenerator.h"
+#include "backend/IOptimizer.h"
+#include "backend/IPortableTensor.h"
+#include "backend/ITensorRegister.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/KernelGenerator.h"
+#include "backend/controlflow/UserTensor.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include <memory>
+
+namespace onert
+{
+namespace
+{
+
+class SyncFunction final : public exec::IFunction
+{
+public:
+ virtual ~SyncFunction() = default;
+ SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
+ : _fn{std::move(fn)}, _config{config}
+ {
+ assert(_fn);
+ assert(_config);
+ }
+
+ void run() override
+ {
+ _fn->run();
+ _config->sync();
+ }
+
+ void prepare() override { _fn->prepare(); }
+
+private:
+ std::unique_ptr<exec::IFunction> _fn;
+ std::shared_ptr<backend::IConfig> _config;
+};
+
+} // namespace
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+ExecutorFactory &ExecutorFactory::get()
+{
+ static ExecutorFactory singleton;
+ return singleton;
+}
+
+ExecutorFactory::ExecutorFactory()
+{
+ _map["Linear"] = createLinearExecutor;
+ _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, false);
+ _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, true);
+}
+
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)
+{
+ return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+}
+
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
+{
+ struct Entry
+ {
+ std::vector<backend::BackendContext::OperationInfo> operation_list;
+ std::vector<ir::OperandIndex> operand_list;
+ };
+ std::unordered_map<const backend::Backend *, Entry> backend_assets;
+
+ // Build lists for operations
+ lowered_graph->op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
+ auto backend = op_seq_li.at(op_seq_index)->backend();
+ for (auto &operation_idx : op_seq.operations())
+ {
+ backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
+ }
+ });
+
+ // Build lists for operands
+ lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ const auto lower_info = lowered_graph->getLowerInfo(ind);
+ for (auto factor : lower_info->def_factors())
+ {
+ auto backend = factor.backend();
+ backend_assets[backend].operand_list.emplace_back(ind);
+ }
+ });
+
+ for (auto &pair : backend_assets)
+ {
+ auto backend = pair.first;
+ auto &arg = pair.second;
+ lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
+ }
+}
+
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
+ const std::vector<ir::OpSequenceIndex> &order)
+{
+ for (const auto index : order)
+ {
+ const auto &op_seq = lowered_graph->op_seqs().at(index);
+ const auto backend = lowered_graph->getLowerInfo(index)->backend();
+ const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
+ if (tensor_register)
+ {
+ // Custom registration
+ tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
+ }
+ else
+ {
+ // Default registration
+ for (const auto op_idx : op_seq)
+ {
+ const auto &op = lowered_graph->graph().operations().at(op_idx);
+ for (const auto &index :
+ (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED))
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
+ {
+ const auto &operand_lower_info =
+ lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend)
+ continue;
+
+ const auto &obj = lowered_graph->graph().operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(),
+ obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+ }
+}
+
+std::vector<backend::ITensor *>
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
+ const ir::OperandIndexSequence &indices)
+{
+ std::vector<backend::ITensor *> ret;
+
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_builder =
+ std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
+ assert(cf_tensor_builder);
+ assert(cf_tensor_reg);
+
+ for (auto ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::controlflow::UserTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
+ );
+
+ // Add tensor to controlflow TensorRegistry.
+ cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor));
+ auto *itensor = cf_tensor_reg->getITensor(ind);
+ ret.push_back(itensor);
+ }
+ return ret;
+}
+
+void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
+{
+ TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
+ lowered_graph.op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
+ auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
+ for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED)
+ {
+ // If an OpSequence input/output tensor does not have a own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor builders and
+ // set the tensor to this tensor builder if portable
+ if (!backend_ctx->tensor_registry->getITensor(ind))
+ {
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
+ }
+ }
+ });
+}
+
+exec::IExecutor *
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)
+{
+ const auto &backend_contexts = lowered_graph->backend_contexts();
+
+ initializeBackendContext(lowered_graph.get());
+
+ // linearize
+ assert(!lowered_graph->graph().isBuildingPhase());
+
+ /*************************************************
+ * Backend dependent analysis & optimization phase
+ *************************************************/
+
+ for (auto &pair : backend_contexts)
+ {
+ auto &optimizer = pair.second->optimizer;
+ if (optimizer)
+ optimizer->optimize();
+ }
+
+ /**********************************************************
+ * Backend dependent analysis & optimization phase finished
+ **********************************************************/
+
+ /***********************
+ * Code generation phase
+ ***********************/
+
+ auto order = Linear::linearize(*lowered_graph);
+ runTensorRegistration(lowered_graph.get(), order);
+
+ std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::ITensor *> output_tensors;
+ if (options.is_primary_subgraph)
+ {
+ input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
+ output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ }
+
+ Linear::dump(*lowered_graph, order);
+ Linear::planTensors(*lowered_graph, order);
+
+ TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ prepareMigrantTensors(*lowered_graph);
+
+ ExecutionBuilder builder;
+
+ // Generate kernels
+ lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
+ const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
+ auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
+ // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
+ auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
+ if (cf_kernel_gen != nullptr)
+ {
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
+ cf_kernel_gen->setExecutorMap(executor_map);
+ }
+ auto fn_seq = kernel_gen->generate(op_seq);
+ if (options.he_profiling_mode)
+ {
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ }
+ builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
+ });
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->initConsts();
+ }
+
+ lowered_graph->graph().operands().iterate(
+ [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ auto code_map = builder.releaseCodeMap();
+
+ for (auto &it : code_map)
+ {
+ auto op_seq_index = it.first;
+ auto &fn_seq = it.second.fn_seq;
+
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ auto exec =
+ new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(code_map), order};
+
+ if (!options.trace_filepath.empty())
+ {
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ exec->addObserver(std::move(ctp));
+ }
+
+ return exec;
+}
+
+exec::IExecutor *ExecutorFactory::createDataflowExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+{
+ const auto &backend_contexts = lowered_graph->backend_contexts();
+
+ initializeBackendContext(lowered_graph.get());
+
+ auto order = Linear::linearize(*lowered_graph);
+ runTensorRegistration(lowered_graph.get(), order);
+
+ std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::ITensor *> output_tensors;
+ if (options.is_primary_subgraph)
+ {
+ input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
+ output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ }
+
+ TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto &tensor_builder : tensor_builders)
+ {
+ lowered_graph->graph().operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (tensor_builder->isRegistered(ind))
+ {
+ tensor_builder->notifyFirstUse(ind);
+ }
+ });
+ }
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ prepareMigrantTensors(*lowered_graph);
+
+ ExecutionBuilder builder;
+
+ // Generate kernels
+ lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
+ const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
+ auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
+ // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
+ auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
+ if (cf_kernel_gen != nullptr)
+ {
+ assert(cf_kernel_gen != nullptr);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
+ cf_kernel_gen->setExecutorMap(executor_map);
+ }
+ auto fn_seq = kernel_gen->generate(op_seq);
+ if (options.he_profiling_mode)
+ {
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ }
+ builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
+ });
+
+ for (const auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ for (auto &pair : backend_contexts)
+ {
+ pair.second->initConsts();
+ }
+
+ lowered_graph->graph().operands().iterate(
+ [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ auto code_map = builder.releaseCodeMap();
+
+ for (auto &it : code_map)
+ {
+ auto op_seq_index = it.first;
+ auto &fn_seq = it.second.fn_seq;
+
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
+ auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ exec::ExecutorBase *exec = nullptr;
+ if (parallel)
+ {
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
+ tensor_regs, std::move(code_map)};
+ }
+ else
+ {
+ auto dataflow_exec = new exec::DataflowExecutor{
+ std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)};
+ if (options.he_profiling_mode)
+ {
+ std::vector<const backend::Backend *> backends;
+ for (const auto &pair : backend_contexts)
+ {
+ backends.push_back(pair.first);
+ }
+ auto et = std::make_shared<exec::ExecTime>(backends);
+ std::unique_ptr<exec::IExecutionObserver> obs =
+ std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+ dataflow_exec->addObserver(std::move(obs));
+ }
+ exec = dataflow_exec;
+ }
+
+ if (!options.trace_filepath.empty())
+ {
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ exec->addObserver(std::move(ctp));
+ }
+
+ return exec;
+}
+
+} // namespace compiler
+} // namespace onert