summaryrefslogtreecommitdiff
path: root/runtimes/neurun/core/src/compiler/ExecutorFactory.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/core/src/compiler/ExecutorFactory.cc')
-rw-r--r--runtimes/neurun/core/src/compiler/ExecutorFactory.cc351
1 files changed, 351 insertions, 0 deletions
diff --git a/runtimes/neurun/core/src/compiler/ExecutorFactory.cc b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc
new file mode 100644
index 000000000..2ff32a57e
--- /dev/null
+++ b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutorFactory.h"
+
+#include <functional>
+#include "exec/ExecutionObservers.h"
+#include "exec/LinearExecutor.h"
+#include "exec/DataflowExecutor.h"
+#include "exec/ParallelExecutor.h"
+#include "compiler/BackendResolver.h"
+#include "backend/ExecTime.h"
+#include "compiler/Linear.h"
+#include "graph/dumper/Dumper.h"
+#include "OperationValidator.h"
+#include "SubTensorAnalyzer.h"
+#include "backend/IConstantInitializer.h"
+#include "backend/IKernelGenerator.h"
+#include "backend/IShapeFixer.h"
+#include "cpp14/memory.h"
+
+namespace neurun
+{
+namespace compiler
+{
+
+ExecutorFactory &ExecutorFactory::instance()
+{
+ static ExecutorFactory singleton;
+ return singleton;
+}
+
+ExecutorFactory::ExecutorFactory()
+{
+ _map["Linear"] = createLinearExecutor;
+ _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, false);
+ _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, true);
+}
+
+exec::IExecutor *ExecutorFactory::create(const std::string &id, graph::Graph &graph)
+{
+ return _map.at(id)(graph);
+}
+
+exec::IExecutor *ExecutorFactory::createLinearExecutor(graph::Graph &graph)
+{
+ auto operand_context = std::make_shared<OperandContext>();
+ const auto &operands = graph.operands();
+
+ // Compilation result will be filled in operand_context and operation_sequence
+ auto function_sequence = std::make_shared<exec::FunctionSequence>();
+
+ // linearize
+ auto linear = graph.linearize();
+
+ // Dump ops
+ linear->accept(neurun::graph::dumper::Dumper{});
+
+ linear->accept(OperationValidator{operands});
+
+ /*************************************************
+ * Backend dependent analysis & optimization phase
+ *************************************************/
+
+ // SubTensorInfo should be generated after lower, before shape correction and finalize
+ // because SubTensorAnalyzer assume that insert permutation is already finished
+ // lower: decide backend and insert permutation
+ // fix shapes: prepare codegen to optimization
+ // generate tensor objects: generate tensor using subtensor info
+ // generate kernels
+ // allocate tesor memory
+ // constant intialization: fill the constants with values
+ // Generated SubTensorInfo is in operand(Object)
+ // for easy pass SubTensorInfo to plan builder and tensor builder
+ linear->accept(SubTensorAnalyzer{graph.operands()});
+
+ /**********************************************************
+ * Backend dependent analysis & optimization phase finished
+ **********************************************************/
+
+ /***********************
+ * Code generation phase
+ ***********************/
+
+ // Fix shapes
+ linear->iterate([&](const compiler::Linear::Element &element) {
+ auto backend = element.lower_info->backend();
+ auto shape_fixer = linear->getBackendContext(backend)->shape_fixer;
+ shape_fixer->fix(*element.subgraph);
+ });
+
+ linear->planTensors();
+
+ auto tensor_builders = linear->backend_resolver()->tensor_builders();
+
+ // Prepare tensors
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ // Generate initializers
+ linear->generateConstantInitializers();
+
+ class ExecutionBuilder final : public IExecutionBuilder
+ {
+ public:
+ ExecutionBuilder(exec::FunctionSequence &functions) : _functions{functions}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ void append(std::unique_ptr<::neurun::exec::IFunction> &&f) override
+ {
+ _functions.append(std::move(f));
+ }
+
+ private:
+ exec::FunctionSequence &_functions;
+ };
+
+ auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>(*function_sequence);
+
+ // Generate kernels
+ linear->iterate([&](const compiler::Linear::Element &element) {
+ auto backend = element.lower_info->backend();
+ auto kernel_gen = linear->getBackendContext(backend)->kernel_gen;
+ kernel_gen->generate(*element.subgraph, execution_builder.get());
+ });
+
+ // Allocate Tensor Memory
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ // TODO Add optimization passes
+
+ // Initialize constant tensors
+ for (const auto backend : backend::BackendManager::instance().getAll())
+ {
+ linear->getBackendContext(backend)->constant_initializer->run();
+ }
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->finalize();
+ }
+
+ // Wrap tensors as Object and store them to plan
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->iterate([&](const model::OperandIndex &index) {
+ auto object = tensor_builder->wrapTensor(index);
+ operand_context->set(index, object);
+ });
+ }
+
+ // Prepare each TensorManager on each backend
+ auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>();
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_mgrs->insert(tensor_builder->releaseTensorManager());
+ }
+
+ return new exec::LinearExecutor{graph.shareModel(), linear->releaseSubgraphs(),
+ operand_context, linear->releaseLowerInfo(),
+ std::move(tensor_mgrs), linear->releaseElements(),
+ function_sequence};
+}
+
+exec::IExecutor *ExecutorFactory::createDataflowExecutor(graph::Graph &graph, bool parallel)
+{
+ auto operand_context = std::make_shared<OperandContext>();
+
+ graph.subgraphs().iterate([&](const model::SubgraphIndex &, const model::Subgraph &subg) {
+ auto subtensor_analyzer = SubTensorAnalyzer{graph.operands()};
+ subg.accept(subtensor_analyzer);
+ });
+
+ // Fix shapes
+ graph.subgraphs().iterate(
+ [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) {
+ auto backend = graph.getLowerInfo(subg_index)->backend();
+ auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer;
+ shape_fixer->fix(subg);
+ });
+
+ graph.operands().iterate([&](const model::OperandIndex &ind, const model::Operand &obj) {
+ const auto lower_info = graph.getLowerInfo(ind);
+ for (auto factor : lower_info->def_factors())
+ {
+ bool isSubTensor = false;
+ auto backend = factor.backend();
+ auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder;
+
+ if (backend->config()->SupportSubTensorAlloc())
+ {
+ const auto parentInfo = obj.parent_info();
+ if (parentInfo != nullptr)
+ {
+ isSubTensor = true;
+ }
+ }
+
+ if (isSubTensor)
+ {
+ const compiler::SubTensorInfo info(obj);
+ tensor_builder->registerSubTensorInfo(ind, info);
+ }
+ else
+ {
+ const auto info = obj.info();
+ // NOTE This assumes an operand can have one layout, and only PermutateNode can have
+ // different layouts for input and output
+ const auto &def = *obj.getDef().list().cbegin();
+ auto frontend_layout =
+ graph.subgraphs().at(graph.subgraphs().getOperation(def)).getLayout();
+ if (frontend_layout == model::Layout::UNKNOWN)
+ {
+ const auto &use = *obj.getUses().list().cbegin();
+ frontend_layout = graph.subgraphs().at(graph.subgraphs().getOperation(use)).getLayout();
+ }
+ const auto backend_layout = lower_info->def_factors().getOnlyElement().layout();
+ tensor_builder->registerTensorInfo(ind, info, frontend_layout, backend_layout,
+ obj.isConstant());
+ // To make this never be deallocated, this is a workaround to use static memory planner
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+ });
+
+ auto tensor_builders = graph.backend_resolver()->tensor_builders();
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->prepare();
+ }
+
+ class ExecutionBuilder : public IExecutionBuilder
+ {
+ public:
+ void append(std::unique_ptr<exec::IFunction> &&fn) override
+ {
+ auto itr = _code_map.find(_next_index);
+ if (itr == _code_map.end())
+ {
+ _code_map[_next_index] = nnfw::cpp14::make_unique<exec::FunctionSequence>();
+ }
+ _code_map[_next_index]->append(std::move(fn));
+ };
+
+ // TODO Remove this method and make `append` to get index value as an argument
+ void setNextIndex(const model::SubgraphIndex next_index) { _next_index = next_index; }
+
+ exec::DataflowExecutor::CodeMap &&releaseCodeMap() { return std::move(_code_map); }
+
+ private:
+ model::SubgraphIndex _next_index;
+ exec::DataflowExecutor::CodeMap _code_map;
+ };
+
+ auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>();
+
+ // Generate kernels
+ graph.subgraphs().iterate(
+ [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) {
+ auto backend = graph.getLowerInfo(subg_index)->backend();
+ auto constant_initializer =
+ graph.backend_resolver()->getBackendContext(backend)->constant_initializer;
+ constant_initializer->generate(subg, graph.operands());
+ // TODO This approach is temporal. See declaration of `setNextIndex`.
+ execution_builder->setNextIndex(subg_index);
+ auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen;
+ kernel_gen->generate(subg, execution_builder.get());
+ });
+
+ for (const auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->allocate();
+ }
+
+ // Initialize constant tensors
+ for (const auto backend : backend::BackendManager::instance().getAll())
+ {
+ graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run();
+ }
+
+ auto lower_info = graph.releaseLowerInfo();
+
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->finalize();
+ }
+
+ // Wrap tensors as Object and store them to plan
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_builder->iterate([&](const model::OperandIndex &index) {
+ auto object = tensor_builder->wrapTensor(index);
+ operand_context->set(index, object);
+ });
+ }
+
+ // Prepare each TensorManager on each backend
+ auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>();
+ for (auto &tensor_builder : tensor_builders)
+ {
+ tensor_mgrs->insert(tensor_builder->releaseTensorManager());
+ }
+
+ if (parallel)
+ {
+ return new exec::ParallelExecutor{
+ graph.shareModel(), graph.releaseSubgraphs(),
+ operand_context, std::move(lower_info),
+ std::move(tensor_mgrs), std::move(execution_builder->releaseCodeMap())};
+ }
+ else
+ {
+ auto exec = new exec::DataflowExecutor{
+ graph.shareModel(), graph.releaseSubgraphs(),
+ operand_context, std::move(lower_info),
+ std::move(tensor_mgrs), std::move(execution_builder->releaseCodeMap())};
+ if (util::getConfigBool(util::config::PROFILING_MODE))
+ {
+ auto et = std::make_shared<backend::ExecTime>(backend::BackendManager::instance().getAll());
+ std::unique_ptr<exec::IExecutionObserver> obs =
+ nnfw::cpp14::make_unique<exec::ProfileObserver>(et);
+ exec->addObserver(std::move(obs));
+ }
+ return exec;
+ }
+}
+
+} // namespace compiler
+} // namespace neurun