summaryrefslogtreecommitdiff
path: root/runtime/onert/core/src/compiler/ExecutorFactory.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/core/src/compiler/ExecutorFactory.cc')
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc1037
1 files changed, 709 insertions, 328 deletions
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 062c6c9c3..6a08524cc 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,26 +16,37 @@
#include "ExecutorFactory.h"
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#ifdef MINMAX_H5DUMPER
+#include "../exec/MinMaxRecorder.h"
+#endif
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
+
#include <functional>
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "exec/ExecTime.h"
-#include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/KernelGenerator.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/controlflow/TensorBuilder.h"
#include <memory>
+#ifdef ONERT_TRAIN
+#include "../backend/builtin/train/BackendContext.h"
+#include "../exec/train/TrainableExecutor.h"
+
+#include <backend/train/TrainableBackendContext.h>
+#include <backend/train/ITrainableBackend.h>
+#endif // ONERT_TRAIN
+
namespace onert
{
namespace
@@ -46,7 +57,7 @@ class SyncFunction final : public exec::IFunction
public:
virtual ~SyncFunction() = default;
SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
- : _fn{std::move(fn)}, _config{config}
+ : _fn{std::move(fn)}, _config{config}
{
assert(_fn);
assert(_config);
@@ -65,21 +76,218 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
-// TODO Think of a better way to manage TensorManagers
-backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+using DeallocList = std::vector<backend::ITensor *>;
+// Deallocation after execution of an operation used by Linear Executor
+class DeallocFunction final : public exec::IFunction
+{
+public:
+ DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}
+
+ void run() override
+ {
+ for (auto &&tensor : _dealloc_list)
+ {
+ if (!tensor->is_dynamic())
+ continue;
+ tensor->deallocBuffer();
+ }
+ }
+
+private:
+ DeallocList _dealloc_list;
+};
+
+// TODO Unify initializeSubgraphIOTensors
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts,
+ const ir::OperandIndexSequence &indices)
+{
+ // TODO Store builtin backend in BackendContext
+ std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg;
+ for (const auto &e : backend_contexts)
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::builtin::Config::ID)
+ {
+ builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry);
+ }
+ }
+ assert(builtin_tensor_reg);
+
+ for (auto &&ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::builtin::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+ );
+
+ // Add tensor to builtin TensorRegistry.
+ builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+
+#ifdef ONERT_TRAIN
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::train::TrainableBackendContexts &backend_contexts,
+ const ir::OperandIndexSequence &indices)
{
- backend::TensorManagerSet tensor_mgrs;
- for (auto &tensor_builder : tensor_builders)
+ std::shared_ptr<backend::builtin::train::TensorRegistry> builtin_tensor_reg;
+ for (const auto &e : backend_contexts)
{
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(s_tensor_manager));
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::builtin::Config::ID)
+ {
+ builtin_tensor_reg = std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(
+ context->tensor_registry());
+ }
+ }
+ assert(builtin_tensor_reg);
+
+ for (auto &&ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::builtin::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+ );
+
+ // Add tensor to builtin TensorRegistry.
+ builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+#endif // ONERT_TRAIN
+
+backend::BackendContexts
+createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor,
+ std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder)
+{
+ backend::BackendContexts contexts;
+ auto &backend_manager = compiler::BackendManager::get();
+
+ std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map;
+
+ // Generate partial graphs for each backend
+ for (auto &&backend : backend_manager.getAll())
+ {
+ auto &data = context_data_map[backend];
+ auto graph = std::make_unique<ir::Graph>();
+ graph->setLayout(lgraph.graph().layout());
+ data.graph = std::move(graph);
+ }
+
+ auto &whole_graph = lgraph.graph();
+ // Separate operands into partial graphs
+ whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) {
+ auto &operand_li = lgraph.lower_info().operand;
+ const auto &def_factors = operand_li.at(operand_ind).def_factors();
+ if (def_factors.size() == 0) // Ignore unused tensor
+ return;
+ const auto &def_factor = def_factors.getOnlyElement();
+ const auto backend = def_factor.backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = def_factor.layout();
+
+ // Copy the operand and insert it to the partial graph
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ operand.releaseData(); // Deref data of LoweredGraph
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+ });
+ // Separate operations into partial graphs
+ whole_graph.operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &operation) {
+ auto &op_li = lgraph.lower_info().operation;
+ auto backend = op_li.at(op_ind).backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &external_operands = context_data_map[backend].external_operands;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+
+ {
+ // Add missing operands (externals)
+ auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+ for (auto &&operand_ind : io_list)
+ {
+ if (partial_graph.operands().exist(operand_ind))
+ continue;
+
+ // Copy the operand and insert it to the partial graph
+ const auto &operand = whole_graph.operands().at(operand_ind);
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+
+ auto layout =
+ lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout();
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = layout;
+ external_operands.add(operand_ind);
+ }
+
+ auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation));
+ UNUSED_RELEASE(new_op_ind);
+ assert(new_op_ind == op_ind);
+ }
+ });
+
+ // Create contexts
+ auto whole_op_order = lgraph.graph().topolSortOperations();
+ for (auto &&pair : context_data_map)
+ {
+ auto backend = pair.first;
+ auto &data = pair.second;
+ // Handle graph input/outputs or external tensors
+ data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind))
+ data.external_operands.add(ind);
+ // Inputs are either "graph input" or "no def op and non-constant"
+ if (whole_graph.getInputs().contains(ind) ||
+ (!operand.getDef().valid() && !operand.isConstant()))
+ // Outputs are either "graph output" or "no uses"
+ data.graph->addInput(ind);
+ if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0)
+ data.graph->addOutput(ind);
+ });
+ dumper::text::dumpGraph(*data.graph);
+
+ std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order),
+ [&](const auto &ind) { return data.graph->operations().exist(ind); });
+ data.is_linear_executor = linear_executor;
+ data.custom_kernel_builder = custom_kernel_builder;
+ contexts.emplace(backend, backend->newContext(std::move(data)));
+ }
+ return contexts;
+}
+
+template <typename Context>
+std::deque<std::pair<const backend::Backend *, Context *>> orderBackendContext(
+ const std::unordered_map<const backend::Backend *, std::unique_ptr<Context>> &tbackend_contexts)
+{
+ std::deque<std::pair<const backend::Backend *, Context *>> ordered_contexts;
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(d_tensor_manager));
+ for (auto &&pair : tbackend_contexts)
+ {
+ // NOTE builtin backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "builtin")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- return tensor_mgrs;
+
+ return ordered_contexts;
}
} // namespace
@@ -106,415 +314,588 @@ ExecutorFactory::ExecutorFactory()
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args)
{
- return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+ assert(args.options != nullptr);
+ return _map.at(args.options->executor)(std::move(lowered_graph), executors, args);
}
-void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts)
{
- struct Entry
- {
- std::vector<backend::BackendContext::OperationInfo> operation_list;
- std::vector<ir::OperandIndex> operand_list;
- };
- std::unordered_map<const backend::Backend *, Entry> backend_assets;
-
- // Build lists for operations
- lowered_graph->op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
- auto backend = op_seq_li.at(op_seq_index)->backend();
- for (auto &operation_idx : op_seq.operations())
+ TensorRegistries tensor_regs{backend_contexts, true};
+
+ lowered_graph.graph().operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+ auto &backend_ctx = backend_contexts.at(lower_info->backend());
+ for (auto &&ind :
+ (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ {
+ // If an Operation's input/output tensor does not have an own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor registries and
+ // register it to the current tensor registry if it is portable
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
- });
+ }
+ });
+}
- // Build lists for operands
- lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- const auto lower_info = lowered_graph->getLowerInfo(ind);
- for (auto factor : lower_info->def_factors())
+void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index)
+{
+ for (auto &&pair : backend_contexts)
+ {
+ auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
{
- auto backend = factor.backend();
- backend_assets[backend].operand_list.emplace_back(ind);
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setExecutors(executors);
+ builtin_kernel_gen->setModelIndex(index);
}
- });
+ }
+}
- for (auto &pair : backend_assets)
+std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts)
+{
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
+ for (auto &&pair : backend_contexts)
{
- auto backend = pair.first;
- auto &arg = pair.second;
- lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
+ // NOTE builtin backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "builtin")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
+ return ordered_contexts;
}
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
+exec::IExecutor *
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args)
{
- for (const auto index : order)
+ const auto options = args.options;
+ const auto &model_index = args.model_index;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
+ auto &graph = lowered_graph->graph();
+
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
+
+ TensorRegistries tensor_regs{backend_contexts, true};
+
+ initializeSubgraphIOTensors(
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+
+ // linearize
+ auto order = Linear::linearize(*lowered_graph);
+ Linear::dump(*lowered_graph, order);
+
+ for (auto &&pair : backend_contexts)
{
- const auto &op_seq = lowered_graph->op_seqs().at(index);
- const auto backend = lowered_graph->getLowerInfo(index)->backend();
- const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+ pair.second->genTensors();
+ }
+
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- if (tensor_register)
+ // Give some runtime objects to builtin KernelGenerator
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
+
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts = orderBackendContext(backend_contexts);
+
+ // Simulate the execution for deallocation of tensors
+ std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ uses_map[ind] = obj.getUses().size();
+
+ if (obj.isConstant())
+ constants.append(ind);
+ });
+
+ // A trick to consider constants as an execption
+ for (const auto &ind : constants)
{
- // Custom registration
- tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
+ uses_map[ind]++;
}
- else
+
+ for (const auto &op_ind : order)
{
- // Default registration
- for (const auto op_idx : op_seq)
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ for (const auto &ind : op_inputs)
{
- const auto &op = lowered_graph->graph().operations().at(op_idx);
- for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+ const auto &operand = graph.operands().at(ind);
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
{
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
- {
- const auto &operand_lower_info =
- lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend)
- continue;
-
- const auto &obj = lowered_graph->graph().operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(),
- obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
+ dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
}
}
}
- }
-}
-std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices)
-{
- std::vector<std::shared_ptr<backend::ITensor>> ret;
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ }
- // TODO Store controlflow backend in BackendContext
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
- std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
- for (const auto &e : lowered_graph.backend_contexts())
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ }
+
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- auto backend = e.first;
- auto &context = e.second;
- if (backend->config()->id() == backend::controlflow::Config::ID)
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
{
- cf_tensor_builder =
- std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
- cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ auto &op_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+ if (options->he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ if (!dealloc_list_map[op_ind].empty())
+ fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
}
}
- assert(cf_tensor_builder);
- assert(cf_tensor_reg);
- for (auto ind : indices)
+ auto code_map = builder.releaseCodeMap();
+
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+ std::move(backend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
+
+ if (!options->trace_filepath.empty())
{
- const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_shared<backend::controlflow::UserTensor>(
- operand.info(),
- ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
- cf_tensor_builder->dynamicTensorManager());
-
- // Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeUserTensor(ind, tensor);
- ret.push_back(tensor);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+ exec->addObserver(std::move(ctp));
}
- return ret;
-}
+#ifdef MINMAX_H5DUMPER
+ if (!options->minmax_filepath.empty())
+ exec->addObserver(std::make_unique<exec::MinMaxRecorder>(
+ options->minmax_filepath, exec->graph(), exec->getBackendContexts()));
+#endif
-void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
-{
- TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
-
- lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
- auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
- for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- // If an OpSequence input/output tensor does not have a own tensor object,
- // it must be using external tensors, so find the tensor from other tensor builders and
- // set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_registry->getITensor(ind))
- {
- auto tensor = tensor_regs.getITensor(ind);
- assert(tensor); // The tensor must have been registered
- auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
- if (ptensor)
- backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
- }
- }
- });
+ return exec;
}
exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ExecutorFactory::createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args, bool parallel)
{
- const auto &backend_contexts = lowered_graph->backend_contexts();
+ const auto options = args.options;
+ const auto &model_index = args.model_index;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
- initializeBackendContext(lowered_graph.get());
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
- // linearize
- assert(!lowered_graph->graph().isBuildingPhase());
+ TensorRegistries tensor_regs{backend_contexts, true};
- /*************************************************
- * Backend dependent analysis & optimization phase
- *************************************************/
+ initializeSubgraphIOTensors(
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
- auto &optimizer = pair.second->optimizer;
- if (optimizer)
- optimizer->optimize();
+ pair.second->genTensors();
}
- /**********************************************************
- * Backend dependent analysis & optimization phase finished
- **********************************************************/
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- /***********************
- * Code generation phase
- ***********************/
+ // Give some runtime objects to builtin KernelGenerator
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
- auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
+ ExecutionBuilder builder;
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- if (options.is_primary_subgraph)
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts = orderBackendContext(backend_contexts);
+
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
+ {
+ auto &op_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+ if (options->he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
+ }
}
- Linear::dump(*lowered_graph, order);
- Linear::planTensors(*lowered_graph, order);
+ auto code_map = builder.releaseCodeMap();
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+ exec::ExecutorBase *exec = nullptr;
+ if (parallel)
+ {
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
+ tensor_regs, std::move(code_map), tracing_ctx};
+ }
+ else
+ {
+ auto dataflow_exec =
+ new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+ std::move(code_map), tracing_ctx};
+ if (options->he_profiling_mode)
+ {
+ std::vector<const backend::Backend *> backends;
+ for (const auto &pair : backend_contexts)
+ {
+ backends.push_back(pair.first);
+ }
+ auto et = std::make_shared<exec::ExecTime>(backends);
+ std::unique_ptr<exec::IExecutionObserver> obs =
+ std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+ dataflow_exec->addObserver(std::move(obs));
+ }
+ exec = dataflow_exec;
+ }
- for (auto &tensor_builder : tensor_builders)
+ if (!options->trace_filepath.empty())
{
- tensor_builder->prepare();
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+ exec->addObserver(std::move(ctp));
}
- prepareExternalTensors(*lowered_graph);
+ return exec;
+}
- ExecutionBuilder builder;
+#ifdef ONERT_TRAIN
+exec::IExecutor *
+ExecutorFactory::create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+ assert(args.options != nullptr);
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ if (args.options->executor != "Linear")
+ throw std::runtime_error("ExecutorFactory: TrainableExecutor supports only 'Linear' now");
+
+ return createTrainableExecutor(std::move(lowered_graph), executors, args, optimizer);
+}
+
+void ExecutorFactory::prepareMigrantTensors(
+ compiler::ILoweredGraph &lowered_graph,
+ const backend::train::TrainableBackendContexts &backend_contexts)
+{
+ train::TensorRegistries tensor_regs{backend_contexts, true};
+
+ lowered_graph.graph().operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+ auto &backend_ctx = backend_contexts.at(lower_info->backend());
+ for (auto &&ind :
+ (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ {
+ // If an Operation's input/output tensor does not have an own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor registries and
+ // register it to the current tensor registry if it is portable
+ if (!backend_ctx->tensor_registry()->getITensor(ind))
+ {
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry()->setMigrantTensor(ind, ptensor);
+ }
+ }
+ });
+}
+
+exec::IExecutor *ExecutorFactory::createTrainableExecutor(
+ std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &, const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+ const auto options = args.options;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
+
+ auto &graph = lowered_graph->graph();
+
+ lowered_graph->trainable_graph().operations().iterate([](const onert::ir::OperationIndex &,
+ const onert::ir::IOperation &op) {
+ try
{
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ UNUSED_RELEASE(dynamic_cast<const ir::train::ITrainableOperation &>(op));
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
+ catch (std::bad_cast &)
{
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ throw std::runtime_error("ExecutorFactory: " + op.name() + " is not trainable operation yet");
}
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
});
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
- }
+ // TODO Create context only once instead of replacing
+ backend::train::TrainableBackendContexts tbackend_contexts;
+ backend::BackendContexts base_backend_contexts =
+ createBackendContexts(*lowered_graph, true, custom_kernel_builder);
- for (auto &pair : backend_contexts)
+ // Replace BackendContext with TrainbleBackendContext
+ for (auto &&pair : base_backend_contexts)
{
- pair.second->initConsts();
- }
-
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
- {
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
+ auto ctx = pair.second.get();
+ const auto &data = ctx->data();
+
+ // Create partial and trainable graphs
+ auto tgraph = std::make_unique<ir::train::TrainableGraph>(*data.graph);
+ data.graph->operations().iterate(
+ [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &) {
+ const auto &orig_tgraph = lowered_graph->trainable_graph();
+ const auto &trainable_op = orig_tgraph.operation(op_index);
+ auto gen_index = tgraph->replaceOperation(op_index, trainable_op.clone());
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == op_index);
+ });
+ data.graph->operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ const auto &orig_tgraph = lowered_graph->trainable_graph();
+ if (orig_tgraph.derivatives().exist(index))
+ {
+ const auto &deriv = orig_tgraph.derivatives().at(index);
+ auto new_deriv = std::make_unique<ir::Operand>(deriv);
+ auto gen_index = tgraph->addDerivative(index, std::move(new_deriv));
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == index);
+ }
});
- }
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
- auto exec = new exec::LinearExecutor{
- std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map), order};
+ // Remove outputs of whole graph from external_operands
+ auto external_operands = data.external_operands;
+ for (const auto &index : lowered_graph->trainable_graph().getOutputs())
+ {
+ if (external_operands.contains(index))
+ external_operands.remove(index);
+ }
- if (!options.trace_filepath.empty())
- {
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
- exec->addObserver(std::move(ctp));
+ // Set trainable context data
+ backend::train::TrainableContextData tdata;
+ tdata.tgraph = std::move(tgraph);
+ tdata.op_order = std::move(data.op_order);
+ tdata.external_operands = std::move(external_operands);
+ tdata.operand_layouts = std::move(data.operand_layouts);
+ tdata.custom_kernel_builder = std::move(data.custom_kernel_builder);
+ tdata.is_linear_executor = data.is_linear_executor;
+ tdata.optimizer = optimizer;
+
+ // TODO Remove dynamic_cast
+ try
+ {
+ const auto backend = pair.first;
+ const auto tbackend = dynamic_cast<const backend::train::ITrainableBackend *>(backend);
+ tbackend_contexts.emplace(backend, tbackend->newContext(std::move(tdata)));
+ }
+ catch (const std::bad_cast &)
+ {
+ throw std::runtime_error("ExecutorFactory: Invalid backend - TrainableExecutor does not "
+ "support non-trainble backends");
+ }
}
+ base_backend_contexts.clear();
- return exec;
-}
-
-exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
-{
- const auto &backend_contexts = lowered_graph->backend_contexts();
+ train::TensorRegistries tensor_regs{tbackend_contexts, true};
- initializeBackendContext(lowered_graph.get());
+ initializeSubgraphIOTensors(
+ *lowered_graph, tbackend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ // linearize
auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
+ Linear::dump(*lowered_graph, order);
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- if (options.is_primary_subgraph)
+ for (auto &&pair : tbackend_contexts)
{
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ pair.second->genTensors();
}
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto &tensor_builder : tensor_builders)
+ for (auto &&pair : tbackend_contexts)
{
- lowered_graph->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- {
- tensor_builder->notifyFirstUse(ind);
- }
- });
+ auto tctx = pair.second.get();
+ tctx->genTrainingTensors();
}
- for (auto &tensor_builder : tensor_builders)
+ prepareMigrantTensors(*lowered_graph, tbackend_contexts);
+
+ // Give some runtime objects to builtin KernelGenerator
+ for (auto &&pair : tbackend_contexts)
{
- tensor_builder->prepare();
+ auto builtin_context =
+ dynamic_cast<backend::builtin::train::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
+ {
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setWholeGraphOutputs(lowered_graph->trainable_graph().getOutputs());
+ }
}
- prepareExternalTensors(*lowered_graph);
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts =
+ onert::orderBackendContext<backend::train::TrainableBackendContext>(tbackend_contexts);
- ExecutionBuilder builder;
+ // TODO Remove this simulation
+ // Simulate the execution for deallocation of tensors
+ std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexSequence constants;
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ uses_map[ind] = obj.getUses().size();
+
+ if (obj.isConstant())
+ constants.append(ind);
+ });
+
+ // A trick to consider constants as an execption
+ for (const auto &ind : constants)
{
- assert(cf_kernel_gen != nullptr);
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ uses_map[ind]++;
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
+
+ for (const auto op_ind : order)
{
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ for (const auto &ind : op_inputs)
+ {
+ const auto &operand = graph.operands().at(ind);
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
+ {
+ dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
+ }
+ }
}
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
- }
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ }
- for (auto &pair : backend_contexts)
- {
- pair.second->initConsts();
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Check derivative tensors
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ // TODO Support multiple subgraphs
+ // Check if the derivative tensors corresponding to inputs of model are nullptr
+ // NOTE The derivative tensors corresponding to inputs of model are for inputs of PermuteLayers
+ // and they are nullptr and because they are meaningless.
+ assert(std::all_of(lowered_graph->trainable_graph().getInputs().begin(),
+ lowered_graph->trainable_graph().getInputs().end(),
+ [&](const auto &input_idx) {
+ return tensor_regs.getDerivativeITensor(input_idx) == nullptr;
+ }));
+
+ // Check if the derivative tensors corresponding to outputs of model exist
+ assert(std::all_of(lowered_graph->trainable_graph().getOutputs().begin(),
+ lowered_graph->trainable_graph().getOutputs().end(),
+ [&](const auto &output_idx) {
+ return tensor_regs.getDerivativeITensor(output_idx) == nullptr;
+ }));
}
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
-
- exec::ExecutorBase *exec = nullptr;
- if (parallel)
- {
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
- }
- else
+ train::TrainableCodeMap code_map;
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
- if (options.he_profiling_mode)
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
{
- std::vector<const backend::Backend *> backends;
- for (const auto &pair : backend_contexts)
- {
- backends.push_back(pair.first);
- }
- auto et = std::make_shared<exec::ExecTime>(backends);
- std::unique_ptr<exec::IExecutionObserver> obs =
- std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
- dataflow_exec->addObserver(std::move(obs));
+ auto &op_ind = pair.first;
+ auto &tn_seq = pair.second;
+ auto &op = lowered_graph->trainable_graph().operation(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+
+ assert(code_map.find(op_ind) == code_map.end());
+ code_map.insert(
+ {op_ind, train::TrainableCodeAndInfo{op_ind, &op, lower_info, std::move(tn_seq)}});
}
- exec = dataflow_exec;
}
- if (!options.trace_filepath.empty())
+ if (order.size() != code_map.size())
+ {
+ throw std::runtime_error("ExecutorFactory: Some kernels are not generated");
+ }
+
+ auto exec = new exec::train::TrainableExecutor{std::move(lowered_graph),
+ std::move(tbackend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
+
+ if (!options->trace_filepath.empty())
{
std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
+ // TODO Support MINMAX_H5DUMPER
return exec;
}
+#endif // ONERT_TRAIN
} // namespace compiler
} // namespace onert