diff options
Diffstat (limited to 'compiler/luci-interpreter/src/core/RuntimeGraph.cpp')
-rw-r--r-- | compiler/luci-interpreter/src/core/RuntimeGraph.cpp | 114 |
1 files changed, 111 insertions, 3 deletions
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp index 06f0fed15..c2f8d2ea8 100644 --- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp +++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp @@ -19,10 +19,102 @@ #include "core/RuntimeModule.h" #include <algorithm> +#include <unordered_map> namespace luci_interpreter { +class RuntimeGraph::TensorAllocPlan +{ + std::vector<std::vector<Tensor *>> _alloc_plan; + std::vector<std::vector<Tensor *>> _dealloc_plan; + bool _valid = false; + IMemoryManager *_memory_manager; + +public: + explicit TensorAllocPlan(IMemoryManager *memory_manager); + void invalidate() { _valid = false; } + bool isValid() const { return _valid; } + void build(const RuntimeGraph &graph); + void allocate(size_t kernel_index) const; + void deallocate(size_t kernel_index) const; +}; + +RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager) + : _memory_manager(memory_manager) +{ +} + +void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph) +{ + invalidate(); + using Lifetime = std::pair<size_t, size_t>; + std::unordered_map<Tensor *, Lifetime> lifetimes; + const size_t num_kernels = graph._kernels.size(); + for (size_t index = 0; index < num_kernels; ++index) + { + const auto &kernel = graph._kernels[index]; + for (const Tensor *tensor : kernel->getInputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = index; + } + for (Tensor *tensor : kernel->getOutputTensors()) + { + assert(lifetimes.count(tensor) == 0); + lifetimes[tensor] = Lifetime(index, index); + } + } + for (const Tensor *tensor : graph.getOutputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = num_kernels; + } + _alloc_plan.assign(num_kernels, std::vector<Tensor *>()); + _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>()); + for (const auto &item : lifetimes) + { + _alloc_plan[item.second.first].push_back(item.first); + _dealloc_plan[item.second.second].push_back(item.first); + } + _valid = true; +} + +void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _alloc_plan.size()); + for (Tensor *tensor : _alloc_plan[kernel_index]) + { + _memory_manager->allocate_memory(*tensor); + } +} + +void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _dealloc_plan.size()); + for (Tensor *tensor : _dealloc_plan[kernel_index]) + { + _memory_manager->release_memory(*tensor); + } +} + +RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager) + : _owning_module(owning_module), _memory_manager(memory_manager), + _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager)) +{ +} + +RuntimeGraph::~RuntimeGraph() +{ + for (auto &tensor : _tensors) + { + if (tensor->is_data_allocated()) + _memory_manager->release_memory(*tensor); + } +} + Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor) { assert(tensor != nullptr); @@ -44,14 +136,23 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors) _output_tensors = output_tensors; } +void RuntimeGraph::configureAllocations(Tensor *tensor) +{ + _memory_manager->allocate_memory(*tensor); +} + void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel) { assert(kernel != nullptr); _kernels.push_back(std::move(kernel)); + _tensor_alloc_plan->invalidate(); } void RuntimeGraph::execute() const { + if (!_tensor_alloc_plan->isValid()) + _tensor_alloc_plan->build(*this); + EventNotifier *event_notifier = _owning_module->getEventNotifier(); // Notify the observers that the input tensors have changed. @@ -59,12 +160,14 @@ void RuntimeGraph::execute() const { for (const Tensor *input_tensor : getInputTensors()) { - event_notifier->postTensorWrite(input_tensor); + if (input_tensor->is_observable()) + event_notifier->postTensorWrite(input_tensor); } } - for (const auto &kernel : _kernels) + for (size_t index = 0; index < _kernels.size(); ++index) { + const auto &kernel = _kernels[index]; if (event_notifier != nullptr) { event_notifier->preOperatorExecute(kernel.get()); @@ -73,6 +176,10 @@ void RuntimeGraph::execute() const // TODO The `configure` method should only be called if the outputs of an operator need to be // resized. kernel->configure(); + + // Preallocate outputs in advance instead of relying on automatic allocation + _tensor_alloc_plan->allocate(index); + kernel->execute(); if (event_notifier != nullptr) @@ -82,11 +189,12 @@ void RuntimeGraph::execute() const for (const Tensor *tensor : kernel->getOutputTensors()) { - if (event_notifier != nullptr) + if (event_notifier != nullptr && tensor->is_observable()) { event_notifier->postTensorWrite(tensor); } } + _tensor_alloc_plan->deallocate(index); } } |