summaryrefslogtreecommitdiff
path: root/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/luci-interpreter/src/core/RuntimeGraph.cpp')
-rw-r--r--compiler/luci-interpreter/src/core/RuntimeGraph.cpp114
1 files changed, 111 insertions, 3 deletions
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
index 06f0fed15..c2f8d2ea8 100644
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -19,10 +19,102 @@
#include "core/RuntimeModule.h"
#include <algorithm>
+#include <unordered_map>
namespace luci_interpreter
{
+class RuntimeGraph::TensorAllocPlan
+{
+ std::vector<std::vector<Tensor *>> _alloc_plan;
+ std::vector<std::vector<Tensor *>> _dealloc_plan;
+ bool _valid = false;
+ IMemoryManager *_memory_manager;
+
+public:
+ explicit TensorAllocPlan(IMemoryManager *memory_manager);
+ void invalidate() { _valid = false; }
+ bool isValid() const { return _valid; }
+ void build(const RuntimeGraph &graph);
+ void allocate(size_t kernel_index) const;
+ void deallocate(size_t kernel_index) const;
+};
+
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+ : _memory_manager(memory_manager)
+{
+}
+
+void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
+{
+ invalidate();
+ using Lifetime = std::pair<size_t, size_t>;
+ std::unordered_map<Tensor *, Lifetime> lifetimes;
+ const size_t num_kernels = graph._kernels.size();
+ for (size_t index = 0; index < num_kernels; ++index)
+ {
+ const auto &kernel = graph._kernels[index];
+ for (const Tensor *tensor : kernel->getInputTensors())
+ {
+ auto nc_tensor = const_cast<Tensor *>(tensor);
+ if (lifetimes.count(nc_tensor) > 0)
+ lifetimes.at(nc_tensor).second = index;
+ }
+ for (Tensor *tensor : kernel->getOutputTensors())
+ {
+ assert(lifetimes.count(tensor) == 0);
+ lifetimes[tensor] = Lifetime(index, index);
+ }
+ }
+ for (const Tensor *tensor : graph.getOutputTensors())
+ {
+ auto nc_tensor = const_cast<Tensor *>(tensor);
+ if (lifetimes.count(nc_tensor) > 0)
+ lifetimes.at(nc_tensor).second = num_kernels;
+ }
+ _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
+ _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
+ for (const auto &item : lifetimes)
+ {
+ _alloc_plan[item.second.first].push_back(item.first);
+ _dealloc_plan[item.second.second].push_back(item.first);
+ }
+ _valid = true;
+}
+
+void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
+{
+ assert(_valid && kernel_index < _alloc_plan.size());
+ for (Tensor *tensor : _alloc_plan[kernel_index])
+ {
+ _memory_manager->allocate_memory(*tensor);
+ }
+}
+
+void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
+{
+ assert(_valid && kernel_index < _dealloc_plan.size());
+ for (Tensor *tensor : _dealloc_plan[kernel_index])
+ {
+ _memory_manager->release_memory(*tensor);
+ }
+}
+
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+ : _owning_module(owning_module), _memory_manager(memory_manager),
+ _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+ for (auto &tensor : _tensors)
+ {
+ if (tensor->is_data_allocated())
+ _memory_manager->release_memory(*tensor);
+ }
+}
+
Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
{
assert(tensor != nullptr);
@@ -44,14 +136,23 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
_output_tensors = output_tensors;
}
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+ _memory_manager->allocate_memory(*tensor);
+}
+
void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
{
assert(kernel != nullptr);
_kernels.push_back(std::move(kernel));
+ _tensor_alloc_plan->invalidate();
}
void RuntimeGraph::execute() const
{
+ if (!_tensor_alloc_plan->isValid())
+ _tensor_alloc_plan->build(*this);
+
EventNotifier *event_notifier = _owning_module->getEventNotifier();
// Notify the observers that the input tensors have changed.
@@ -59,12 +160,14 @@ void RuntimeGraph::execute() const
{
for (const Tensor *input_tensor : getInputTensors())
{
- event_notifier->postTensorWrite(input_tensor);
+ if (input_tensor->is_observable())
+ event_notifier->postTensorWrite(input_tensor);
}
}
- for (const auto &kernel : _kernels)
+ for (size_t index = 0; index < _kernels.size(); ++index)
{
+ const auto &kernel = _kernels[index];
if (event_notifier != nullptr)
{
event_notifier->preOperatorExecute(kernel.get());
@@ -73,6 +176,10 @@ void RuntimeGraph::execute() const
// TODO The `configure` method should only be called if the outputs of an operator need to be
// resized.
kernel->configure();
+
+ // Preallocate outputs in advance instead of relying on automatic allocation
+ _tensor_alloc_plan->allocate(index);
+
kernel->execute();
if (event_notifier != nullptr)
@@ -82,11 +189,12 @@ void RuntimeGraph::execute() const
for (const Tensor *tensor : kernel->getOutputTensors())
{
- if (event_notifier != nullptr)
+ if (event_notifier != nullptr && tensor->is_observable())
{
event_notifier->postTensorWrite(tensor);
}
}
+ _tensor_alloc_plan->deallocate(index);
}
}