11 files changed, 88 insertions, 18 deletions
diff --git a/runtime/neurun/backend/acl_cl/TensorRegister.h b/runtime/neurun/backend/acl_cl/TensorRegister.h
index b78c9b547..a523f2d47 100644
--- a/runtime/neurun/backend/acl_cl/TensorRegister.h
+++ b/runtime/neurun/backend/acl_cl/TensorRegister.h
@@ -18,6 +18,8 @@
 #define __NEURUN_BACKEND_ACL_CL_TENSOR_REGISTER_H__
 
 #include <AclTensorRegister.h>
+#include <misc/polymorphic_downcast.h>
+#include "TensorBuilder.h"
 
 namespace neurun
 {
@@ -26,7 +28,22 @@ namespace backend
 namespace acl_cl
 {
 
-using TensorRegister = acl_common::AclTensorRegister;
+class TensorRegister : public acl_common::AclTensorRegister
+{
+public:
+  TensorRegister(const model::Operands &operands,
+                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+      : acl_common::AclTensorRegister{operands, tensor_builder}
+  {
+    // DO NOTHING
+  }
+
+  void setUsesCount(const model::OperandIndex &ind, size_t num_uses) const override
+  {
+    nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get())
+        ->setUsesCount(ind, num_uses);
+  }
+};
 
 } // namespace acl_cl
 } // namespace backend
diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.cc b/runtime/neurun/backend/acl_cl/operand/CLTensor.cc
index 6153fc2e4..dab74e65f 100644
--- a/runtime/neurun/backend/acl_cl/operand/CLTensor.cc
+++ b/runtime/neurun/backend/acl_cl/operand/CLTensor.cc
@@ -31,8 +31,8 @@ namespace acl_cl
 namespace operand
 {
 
-CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank)
-    : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}
+CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
+    : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
 {
   allocator()->init(info);
 }
diff --git a/runtime/neurun/backend/acl_cl/operand/CLTensor.h b/runtime/neurun/backend/acl_cl/operand/CLTensor.h
index 952851623..8518bf0c3 100644
--- a/runtime/neurun/backend/acl_cl/operand/CLTensor.h
+++ b/runtime/neurun/backend/acl_cl/operand/CLTensor.h
@@ -38,7 +38,7 @@ public:
   CLTensor() = delete;
 
 public:
-  CLTensor(const arm_compute::TensorInfo &info, size_t rank);
+  CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
 
 public:
   size_t num_dimensions() const final { return _rank; }
@@ -46,6 +46,7 @@ public:
 public:
   const arm_compute::CLTensor *handle() const override;
   arm_compute::CLTensor *handle() override;
+  size_t num_uses() const { return _num_uses; }
 
 public:
   arm_compute::CLTensorAllocator *allocator();
@@ -63,6 +64,7 @@ public:
 private:
   std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
   size_t _rank;
+  size_t _num_uses;
 };
 
 } // namespace operand
diff --git a/runtime/neurun/backend/acl_common/AclMemoryManager.h b/runtime/neurun/backend/acl_common/AclMemoryManager.h
index 60d2c5fe1..39662fc08 100644
--- a/runtime/neurun/backend/acl_common/AclMemoryManager.h
+++ b/runtime/neurun/backend/acl_common/AclMemoryManager.h
@@ -66,9 +66,9 @@ public:
   virtual void finishLifetime(const model::OperandIndex &) { /* DO NOTHING */}
 
   void buildTensor(const model::OperandIndex &ind, const ::arm_compute::TensorInfo &info,
-                   size_t rank)
+                   size_t rank, size_t num_uses)
   {
-    auto tensor = std::make_shared<T_Tensor>(info, rank);
+    auto tensor = std::make_shared<T_Tensor>(info, rank, num_uses);
     _tensors[ind] = tensor;
   }
 
diff --git a/runtime/neurun/backend/acl_common/AclTensorManager.h b/runtime/neurun/backend/acl_common/AclTensorManager.h
index 872966365..3849f18dd 100644
--- a/runtime/neurun/backend/acl_common/AclTensorManager.h
+++ b/runtime/neurun/backend/acl_common/AclTensorManager.h
@@ -51,7 +51,7 @@ public:
   void deallocateInternalBufferManager(void);
 
   void buildTensor(const model::OperandIndex &ind, const ::arm_compute::TensorInfo &info,
-                   size_t rank, bool as_const);
+                   size_t rank, bool as_const, size_t num_uses);
   void buildSubtensor(const model::OperandIndex &parent, const model::OperandIndex &child,
                       const ::arm_compute::TensorShape &shape,
                       const ::arm_compute::Coordinates &coordinates, size_t rank,
@@ -145,17 +145,17 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor, T_Object>::deallocateInt
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_Object>
 void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor, T_Object>::buildTensor(
     const model::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
-    bool as_const)
+    bool as_const, size_t num_uses)
 {
   assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
   if (as_const)
   {
-    _const_mgr->buildTensor(ind, info, rank);
+    _const_mgr->buildTensor(ind, info, rank, num_uses);
     _ind_to_mgr.insert({ind, *_const_mgr});
   }
   else
   {
-    _nonconst_mgr->buildTensor(ind, info, rank);
+    _nonconst_mgr->buildTensor(ind, info, rank, num_uses);
     _ind_to_mgr.insert({ind, *_nonconst_mgr});
   }
 }
@@ -281,7 +281,9 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor, T_Object>::tryDeallocCon
   {
     const auto &ind = it->first;
     auto tensor = it->second;
-    if (tensor->handle() && !tensor->handle()->is_used())
+    // NOTE The condition "tensor->num_uses() < 2" is used to prevent deallocating a constant tensor
+    // used in several nodes.
+    if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
     {
       VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
                                 << " will be deallocated as an unused constant tensor" << std::endl;
diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.cc b/runtime/neurun/backend/acl_common/AclTensorRegister.cc
index 14691a6fb..b85cfe72a 100644
--- a/runtime/neurun/backend/acl_common/AclTensorRegister.cc
+++ b/runtime/neurun/backend/acl_common/AclTensorRegister.cc
@@ -30,6 +30,20 @@ AclTensorRegister::AclTensorRegister(const model::Operands &operands,
   assert(tensor_builder != nullptr);
 }
 
+void AclTensorRegister::visit(const model::Subgraph &subgraph)
+{
+  for (const auto &e : subgraph.operations())
+  {
+    const auto &node = *(e.node);
+    node.accept(*this);
+    // Set count of nodes to use operand
+    for (const auto &input : node.getInputs())
+    {
+      setUsesCount(input, _operands.at(input).getUses().size());
+    }
+  }
+}
+
 } // namespace acl_common
 } // namespace backend
 } // namespace neurun
diff --git a/runtime/neurun/backend/acl_common/AclTensorRegister.h b/runtime/neurun/backend/acl_common/AclTensorRegister.h
index ff215072f..1c316255c 100644
--- a/runtime/neurun/backend/acl_common/AclTensorRegister.h
+++ b/runtime/neurun/backend/acl_common/AclTensorRegister.h
@@ -28,11 +28,18 @@ namespace acl_common
 
 class AclTensorRegister : public ITensorRegister
 {
-public:
+protected:
   AclTensorRegister(const model::Operands &operands,
                     const std::shared_ptr<ITensorBuilder> &tensor_builder);
 
-private:
+public:
+  virtual ~AclTensorRegister() = default;
+
+protected:
+  void visit(const model::Subgraph &subgraph);
+  virtual void setUsesCount(const model::OperandIndex &ind, size_t num_uses) const = 0;
+
+protected:
   const model::Operands &operands() const override { return _operands; }
   std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
   bool supportSubTensor() const final { return true; }
diff --git a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
index ef70ee2d6..c770b52e5 100644
--- a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
+++ b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
@@ -97,6 +97,13 @@ public:
 
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
+  void setUsesCount(const model::OperandIndex &index, size_t num_uses)
+  {
+    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+                                                                : true);
+    _uses_count_map[index] = num_uses;
+  }
+
 private:
   void buildTensors(void);
   void buildSubtensors(void);
@@ -108,6 +115,7 @@ private:
   model::OperandIndexMap<compiler::SubTensorInfo> _subtensor_info_map;
   model::OperandIndexMap<bool> _apply_dim_correction_map;
   model::OperandIndexMap<model::Layout> _tensor_layout_map;
+  model::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
   model::OperandIndexSequence _constants;
@@ -325,7 +333,8 @@ void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor, T_Object>::buildTensor
     const auto &backend_layout = _tensor_layout_map[root_parent];
     auto tensor_info = asTensorInfo(info.shape(), info.typeInfo(), model::Layout::UNKNOWN,
                                     backend_layout, _apply_dim_correction_map[ind]);
-    _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), _constants.contains(ind));
+    _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), _constants.contains(ind),
+                             _uses_count_map[ind]);
   }
 }
 
diff --git a/runtime/neurun/backend/acl_neon/TensorRegister.h b/runtime/neurun/backend/acl_neon/TensorRegister.h
index 388cb9172..708beb46e 100644
--- a/runtime/neurun/backend/acl_neon/TensorRegister.h
+++ b/runtime/neurun/backend/acl_neon/TensorRegister.h
@@ -18,6 +18,8 @@
 #define __NEURUN_BACKEND_ACL_NEON_TENSOR_REGISTER_H__
 
 #include <AclTensorRegister.h>
+#include <misc/polymorphic_downcast.h>
+#include "TensorBuilder.h"
 
 namespace neurun
 {
@@ -26,7 +28,22 @@ namespace backend
 namespace acl_neon
 {
 
-using TensorRegister = acl_common::AclTensorRegister;
+class TensorRegister : public acl_common::AclTensorRegister
+{
+public:
+  TensorRegister(const model::Operands &operands,
+                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+      : acl_common::AclTensorRegister{operands, tensor_builder}
+  {
+    // DO NOTHING
+  }
+
+  void setUsesCount(const model::OperandIndex &ind, size_t num_uses) const override
+  {
+    nnfw::misc::polymorphic_downcast<TensorBuilder *>(tensor_builder().get())
+        ->setUsesCount(ind, num_uses);
+  }
+};
 
 } // namespace acl_neon
 } // namespace backend
diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.cc b/runtime/neurun/backend/acl_neon/operand/NETensor.cc
index 756403ef1..8a9ece88f 100644
--- a/runtime/neurun/backend/acl_neon/operand/NETensor.cc
+++ b/runtime/neurun/backend/acl_neon/operand/NETensor.cc
@@ -27,8 +27,8 @@ namespace acl_neon
 namespace operand
 {
 
-NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank)
-    : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}
+NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
+    : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
 {
   allocator()->init(info);
 }
diff --git a/runtime/neurun/backend/acl_neon/operand/NETensor.h b/runtime/neurun/backend/acl_neon/operand/NETensor.h
index 298a82054..3de4695e9 100644
--- a/runtime/neurun/backend/acl_neon/operand/NETensor.h
+++ b/runtime/neurun/backend/acl_neon/operand/NETensor.h
@@ -37,7 +37,7 @@ public:
   NETensor() = delete;
 
 public:
-  NETensor(const arm_compute::TensorInfo &info, size_t rank);
+  NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
 
 public:
   size_t num_dimensions() const final { return _rank; }
@@ -45,6 +45,7 @@ public:
 public:
   const arm_compute::Tensor *handle() const override;
   arm_compute::Tensor *handle() override;
+  size_t num_uses() const { return _num_uses; }
 
 public:
   arm_compute::TensorAllocator *allocator();
@@ -52,6 +53,7 @@ public:
 private:
   std::shared_ptr<arm_compute::Tensor> _ne_tensor;
   size_t _rank;
+  size_t _num_uses;
 };
 
 } // namespace operand