diff options
Diffstat (limited to 'runtimes/neurun/core/src/compiler/ExecutorFactory.cc')
-rw-r--r-- | runtimes/neurun/core/src/compiler/ExecutorFactory.cc | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/runtimes/neurun/core/src/compiler/ExecutorFactory.cc b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc new file mode 100644 index 000000000..2ff32a57e --- /dev/null +++ b/runtimes/neurun/core/src/compiler/ExecutorFactory.cc @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExecutorFactory.h" + +#include <functional> +#include "exec/ExecutionObservers.h" +#include "exec/LinearExecutor.h" +#include "exec/DataflowExecutor.h" +#include "exec/ParallelExecutor.h" +#include "compiler/BackendResolver.h" +#include "backend/ExecTime.h" +#include "compiler/Linear.h" +#include "graph/dumper/Dumper.h" +#include "OperationValidator.h" +#include "SubTensorAnalyzer.h" +#include "backend/IConstantInitializer.h" +#include "backend/IKernelGenerator.h" +#include "backend/IShapeFixer.h" +#include "cpp14/memory.h" + +namespace neurun +{ +namespace compiler +{ + +ExecutorFactory &ExecutorFactory::instance() +{ + static ExecutorFactory singleton; + return singleton; +} + +ExecutorFactory::ExecutorFactory() +{ + _map["Linear"] = createLinearExecutor; + _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, false); + _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, true); +} + +exec::IExecutor *ExecutorFactory::create(const std::string &id, graph::Graph &graph) +{ + return _map.at(id)(graph); +} + +exec::IExecutor *ExecutorFactory::createLinearExecutor(graph::Graph &graph) +{ + auto operand_context = std::make_shared<OperandContext>(); + const auto &operands = graph.operands(); + + // Compilation result will be filled in operand_context and operation_sequence + auto function_sequence = std::make_shared<exec::FunctionSequence>(); + + // linearize + auto linear = graph.linearize(); + + // Dump ops + linear->accept(neurun::graph::dumper::Dumper{}); + + linear->accept(OperationValidator{operands}); + + /************************************************* + * Backend dependent analysis & optimization phase + *************************************************/ + + // SubTensorInfo should be generated after lower, before shape correction and finalize + // because SubTensorAnalyzer assume that insert permutation is already finished + // lower: decide backend and insert permutation + // fix shapes: prepare codegen to optimization + // generate tensor objects: generate tensor using subtensor info + // generate kernels + // allocate tesor memory + // constant intialization: fill the constants with values + // Generated SubTensorInfo is in operand(Object) + // for easy pass SubTensorInfo to plan builder and tensor builder + linear->accept(SubTensorAnalyzer{graph.operands()}); + + /********************************************************** + * Backend dependent analysis & optimization phase finished + **********************************************************/ + + /*********************** + * Code generation phase + ***********************/ + + // Fix shapes + linear->iterate([&](const compiler::Linear::Element &element) { + auto backend = element.lower_info->backend(); + auto shape_fixer = linear->getBackendContext(backend)->shape_fixer; + shape_fixer->fix(*element.subgraph); + }); + + linear->planTensors(); + + auto tensor_builders = linear->backend_resolver()->tensor_builders(); + + // Prepare tensors + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->prepare(); + } + + // Generate initializers + linear->generateConstantInitializers(); + + class ExecutionBuilder final : public IExecutionBuilder + { + public: + ExecutionBuilder(exec::FunctionSequence &functions) : _functions{functions} + { + // DO NOTHING + } + + public: + void append(std::unique_ptr<::neurun::exec::IFunction> &&f) override + { + _functions.append(std::move(f)); + } + + private: + exec::FunctionSequence &_functions; + }; + + auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>(*function_sequence); + + // Generate kernels + linear->iterate([&](const compiler::Linear::Element &element) { + auto backend = element.lower_info->backend(); + auto kernel_gen = linear->getBackendContext(backend)->kernel_gen; + kernel_gen->generate(*element.subgraph, execution_builder.get()); + }); + + // Allocate Tensor Memory + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->allocate(); + } + + // TODO Add optimization passes + + // Initialize constant tensors + for (const auto backend : backend::BackendManager::instance().getAll()) + { + linear->getBackendContext(backend)->constant_initializer->run(); + } + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->finalize(); + } + + // Wrap tensors as Object and store them to plan + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->iterate([&](const model::OperandIndex &index) { + auto object = tensor_builder->wrapTensor(index); + operand_context->set(index, object); + }); + } + + // Prepare each TensorManager on each backend + auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>(); + for (auto &tensor_builder : tensor_builders) + { + tensor_mgrs->insert(tensor_builder->releaseTensorManager()); + } + + return new exec::LinearExecutor{graph.shareModel(), linear->releaseSubgraphs(), + operand_context, linear->releaseLowerInfo(), + std::move(tensor_mgrs), linear->releaseElements(), + function_sequence}; +} + +exec::IExecutor *ExecutorFactory::createDataflowExecutor(graph::Graph &graph, bool parallel) +{ + auto operand_context = std::make_shared<OperandContext>(); + + graph.subgraphs().iterate([&](const model::SubgraphIndex &, const model::Subgraph &subg) { + auto subtensor_analyzer = SubTensorAnalyzer{graph.operands()}; + subg.accept(subtensor_analyzer); + }); + + // Fix shapes + graph.subgraphs().iterate( + [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) { + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto shape_fixer = graph.backend_resolver()->getBackendContext(backend)->shape_fixer; + shape_fixer->fix(subg); + }); + + graph.operands().iterate([&](const model::OperandIndex &ind, const model::Operand &obj) { + const auto lower_info = graph.getLowerInfo(ind); + for (auto factor : lower_info->def_factors()) + { + bool isSubTensor = false; + auto backend = factor.backend(); + auto tensor_builder = graph.backend_resolver()->getBackendContext(backend)->tensor_builder; + + if (backend->config()->SupportSubTensorAlloc()) + { + const auto parentInfo = obj.parent_info(); + if (parentInfo != nullptr) + { + isSubTensor = true; + } + } + + if (isSubTensor) + { + const compiler::SubTensorInfo info(obj); + tensor_builder->registerSubTensorInfo(ind, info); + } + else + { + const auto info = obj.info(); + // NOTE This assumes an operand can have one layout, and only PermutateNode can have + // different layouts for input and output + const auto &def = *obj.getDef().list().cbegin(); + auto frontend_layout = + graph.subgraphs().at(graph.subgraphs().getOperation(def)).getLayout(); + if (frontend_layout == model::Layout::UNKNOWN) + { + const auto &use = *obj.getUses().list().cbegin(); + frontend_layout = graph.subgraphs().at(graph.subgraphs().getOperation(use)).getLayout(); + } + const auto backend_layout = lower_info->def_factors().getOnlyElement().layout(); + tensor_builder->registerTensorInfo(ind, info, frontend_layout, backend_layout, + obj.isConstant()); + // To make this never be deallocated, this is a workaround to use static memory planner + tensor_builder->notifyFirstUse(ind); + } + } + }); + + auto tensor_builders = graph.backend_resolver()->tensor_builders(); + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->prepare(); + } + + class ExecutionBuilder : public IExecutionBuilder + { + public: + void append(std::unique_ptr<exec::IFunction> &&fn) override + { + auto itr = _code_map.find(_next_index); + if (itr == _code_map.end()) + { + _code_map[_next_index] = nnfw::cpp14::make_unique<exec::FunctionSequence>(); + } + _code_map[_next_index]->append(std::move(fn)); + }; + + // TODO Remove this method and make `append` to get index value as an argument + void setNextIndex(const model::SubgraphIndex next_index) { _next_index = next_index; } + + exec::DataflowExecutor::CodeMap &&releaseCodeMap() { return std::move(_code_map); } + + private: + model::SubgraphIndex _next_index; + exec::DataflowExecutor::CodeMap _code_map; + }; + + auto execution_builder = nnfw::cpp14::make_unique<ExecutionBuilder>(); + + // Generate kernels + graph.subgraphs().iterate( + [&](const model::SubgraphIndex &subg_index, const model::Subgraph &subg) { + auto backend = graph.getLowerInfo(subg_index)->backend(); + auto constant_initializer = + graph.backend_resolver()->getBackendContext(backend)->constant_initializer; + constant_initializer->generate(subg, graph.operands()); + // TODO This approach is temporal. See declaration of `setNextIndex`. + execution_builder->setNextIndex(subg_index); + auto kernel_gen = graph.backend_resolver()->getBackendContext(backend)->kernel_gen; + kernel_gen->generate(subg, execution_builder.get()); + }); + + for (const auto &tensor_builder : tensor_builders) + { + tensor_builder->allocate(); + } + + // Initialize constant tensors + for (const auto backend : backend::BackendManager::instance().getAll()) + { + graph.backend_resolver()->getBackendContext(backend)->constant_initializer->run(); + } + + auto lower_info = graph.releaseLowerInfo(); + + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->finalize(); + } + + // Wrap tensors as Object and store them to plan + for (auto &tensor_builder : tensor_builders) + { + tensor_builder->iterate([&](const model::OperandIndex &index) { + auto object = tensor_builder->wrapTensor(index); + operand_context->set(index, object); + }); + } + + // Prepare each TensorManager on each backend + auto tensor_mgrs = nnfw::cpp14::make_unique<backend::TensorManagerSet>(); + for (auto &tensor_builder : tensor_builders) + { + tensor_mgrs->insert(tensor_builder->releaseTensorManager()); + } + + if (parallel) + { + return new exec::ParallelExecutor{ + graph.shareModel(), graph.releaseSubgraphs(), + operand_context, std::move(lower_info), + std::move(tensor_mgrs), std::move(execution_builder->releaseCodeMap())}; + } + else + { + auto exec = new exec::DataflowExecutor{ + graph.shareModel(), graph.releaseSubgraphs(), + operand_context, std::move(lower_info), + std::move(tensor_mgrs), std::move(execution_builder->releaseCodeMap())}; + if (util::getConfigBool(util::config::PROFILING_MODE)) + { + auto et = std::make_shared<backend::ExecTime>(backend::BackendManager::instance().getAll()); + std::unique_ptr<exec::IExecutionObserver> obs = + nnfw::cpp14::make_unique<exec::ProfileObserver>(et); + exec->addObserver(std::move(obs)); + } + return exec; + } +} + +} // namespace compiler +} // namespace neurun |