18 files changed, 689 insertions, 653 deletions
diff --git a/runtimes/neurun/src/backend/cpu/CMakeLists.txt b/runtimes/neurun/src/backend/cpu/CMakeLists.txt
index 95e9af687..dc4406a65 100644
--- a/runtimes/neurun/src/backend/cpu/CMakeLists.txt
+++ b/runtimes/neurun/src/backend/cpu/CMakeLists.txt
@@ -1,19 +1,18 @@
 file(GLOB_RECURSE SOURCES "*.cc")
 
-add_library(${LIB_NEURUN_BACKEND_CPU} STATIC ${SOURCES})
+add_library(${LIB_NEURUN_BACKEND_CPU} SHARED ${SOURCES})
 
 target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${NNFW_INCLUDE_DIR})
 target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${NEURUN_INCLUDE_DIR})
 target_include_directories(${LIB_NEURUN_BACKEND_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow)
 
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} arm_compute) # TODO We should not need this
 target_link_libraries(${LIB_NEURUN_BACKEND_CPU} tensorflow-lite)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_util)
-target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_support_nnapi)
+target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_lib_misc)
+target_link_libraries(${LIB_NEURUN_BACKEND_CPU} nnfw_lib_cpp14)
 target_link_libraries(${LIB_NEURUN_BACKEND_CPU} ${LIB_NEURUN_KERNEL_CPU})
+target_link_libraries(${LIB_NEURUN_BACKEND_CPU} ${LIB_NEURUN})
 
 target_compile_options(${LIB_NEURUN_BACKEND_CPU} PRIVATE -Wall -Wextra -Werror)
 
-set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(${LIB_NEURUN_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
 install(TARGETS ${LIB_NEURUN_BACKEND_CPU} DESTINATION lib/neurun)
diff --git a/runtimes/neurun/src/backend/cpu/BackendConfig.cc b/runtimes/neurun/src/backend/cpu/Config.cc
index 34fc3491a..001ba9d02 100644
--- a/runtimes/neurun/src/backend/cpu/BackendConfig.cc
+++ b/runtimes/neurun/src/backend/cpu/Config.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "backend/cpu/BackendConfig.h"
+#include "backend/cpu/Config.h"
 
 namespace neurun
 {
@@ -23,7 +23,7 @@ namespace backend
 namespace cpu
 {
 
-void BackendConfig::initialize()
+void Config::initialize()
 {
   // DO NOTHING
 }
diff --git a/runtimes/neurun/src/backend/cpu/BackendConfig.h b/runtimes/neurun/src/backend/cpu/Config.h
index 109235bb1..ad9ca0ee8 100644
--- a/runtimes/neurun/src/backend/cpu/BackendConfig.h
+++ b/runtimes/neurun/src/backend/cpu/Config.h
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__
-#define __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__
+#ifndef __NEURUN_BACKEND_CPU_CONFIG_H__
+#define __NEURUN_BACKEND_CPU_CONFIG_H__
 
-#include "backend/IBackendConfig.h"
+#include "backend/interface/IConfig.h"
 
 namespace neurun
 {
@@ -26,20 +26,26 @@ namespace backend
 namespace cpu
 {
 
-class BackendConfig : public IBackendConfig
+class Config : public IConfig
 {
 public:
-  BackendConfig()
+  Config()
   {
     // DO NOTHING
   }
 
+  virtual std::string id() override { return "cpu"; }
   virtual void initialize() override;
   virtual graph::operand::Layout getOperandLayout() { return graph::operand::Layout::NHWC; }
+  virtual bool SupportSubTensorAlloc() override
+  {
+    // NOTE CPU allocator cannot support subtensor allocation yet
+    return false;
+  }
 };
 
 } // namespace cpu
 } // namespace backend
 } // namespace neurun
 
-#endif // __NEURUN_BACKEND_CPU_BACKEND_CONFIG_H__
+#endif // __NEURUN_BACKEND_CPU_CONFIG_H__
diff --git a/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc b/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc
deleted file mode 100644
index 7b08c7131..000000000
--- a/runtimes/neurun/src/backend/cpu/InitializerGenerator.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InitializerGenerator.h"
-
-#include "internal/nnapi/kernel/Reader.h"
-#include "internal/nnapi/kernel/View.h"
-#include "util/kernel/IndexIterator.h"
-
-#include "NeuralNetworks.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-InitializerGenerator::InitializerGenerator(const neurun::graph::operand::Set &ctx) : _ctx(ctx)
-{
-  // DO NOTHING
-}
-
-Initializer
-InitializerGenerator::generateWeight(const graph::operation::Conv2D::Implicit::Node &node)
-{
-  const ::neurun::graph::operand::Index ker_index{node.getInputs().at(1)};
-
-  const auto ker_shape = _ctx.at(ker_index).shape().asKernel();
-  auto ker_base = _ctx.at(ker_index).data().base();
-  auto ker_size = _ctx.at(ker_index).data().size();
-
-  return [ker_shape, ker_base, ker_size](::arm_compute::ITensor &tensor) {
-    const ::internal::nnapi::kernel::Reader<float> from{ker_shape, ker_base, ker_size};
-    ::internal::nnapi::kernel::View<float> into{&tensor};
-
-    ::nnfw::util::kernel::iterate(ker_shape)
-        << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
-             const auto value = from.at(nth, ch, row, col);
-             into.at(nth, row, col, ch) = value;
-           };
-  };
-}
-
-Initializer InitializerGenerator::generateWeight(const graph::operation::FullyConnected::Node &node)
-{
-  const ::neurun::graph::operand::Index weight_index{node.getInputs().at(1)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
-
-  const auto num_output = _ctx.at(weight_index).shape().dim(0);
-  auto weight_base = _ctx.at(weight_index).data().base();
-  auto weight_size = _ctx.at(weight_index).data().size();
-  auto weight_type = _ctx.at(weight_index).typeInfo().type();
-
-  // NOTE We assume that input is a feature map
-  // TODO Remove this restriction!
-  const auto ifm_shape = _ctx.at(input_index).shape().asFeature();
-
-  switch (weight_type)
-  {
-    case ::neurun::graph::operand::DataType::TENSOR_FLOAT32:
-    {
-      return [num_output, ifm_shape, weight_base, weight_size](::arm_compute::ITensor &tensor) {
-        const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H,
-                                                    ifm_shape.W};
-        const ::internal::nnapi::kernel::Reader<float> from{ker_shape, weight_base, weight_size};
-
-        ::nnfw::util::kernel::iterate(ker_shape)
-            << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
-                 const auto value = from.at(nth, ch, row, col);
-
-                 uint32_t offset = 0;
-
-                 // NNAPI uses NHWC ordering
-                 offset += nth * ifm_shape.H * ifm_shape.W * ifm_shape.C;
-                 offset += row * ifm_shape.W * ifm_shape.C;
-                 offset += col * ifm_shape.C;
-                 offset += ch;
-
-                 const ::arm_compute::Coordinates coordinate{offset};
-
-                 auto into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate));
-
-                 *into = value;
-               };
-      };
-    }
-    case ::neurun::graph::operand::DataType::TENSOR_QUANT8_ASYMM:
-    {
-      return [num_output, ifm_shape, weight_base, weight_size](::arm_compute::ITensor &tensor) {
-        const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H,
-                                                    ifm_shape.W};
-        const ::internal::nnapi::kernel::Reader<uint8_t> from{ker_shape, weight_base, weight_size};
-        ::nnfw::util::kernel::iterate(ker_shape)
-            << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
-                 const auto value = from.at(nth, ch, row, col);
-                 uint32_t offset = 0;
-
-                 // NNAPI uses NHWC ordering
-                 offset += nth * ifm_shape.H * ifm_shape.W * ifm_shape.C;
-                 offset += row * ifm_shape.W * ifm_shape.C;
-                 offset += col * ifm_shape.C;
-                 offset += ch;
-
-                 const ::arm_compute::Coordinates coordinate{offset};
-
-                 auto into = reinterpret_cast<uint8_t *>(tensor.ptr_to_element(coordinate));
-
-                 *into = value;
-               };
-      };
-    }
-    default:
-    {
-      throw std::runtime_error("Not supported weight type");
-    }
-  }
-}
-
-Initializer InitializerGenerator::generateBias(const graph::operation::Conv2D::Implicit::Node &node)
-{
-  // TODO Refactor so we can reuse the common code
-
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
-
-  auto bias_base = _ctx.at(bias_index).data().base();
-  const auto bias_size = _ctx.at(bias_index).shape().asVector();
-
-  return [bias_base, bias_size](::arm_compute::ITensor &tensor) {
-    for (int32_t n = 0; n < bias_size; ++n)
-    {
-      const ::arm_compute::Coordinates coordinate{n};
-
-      float *into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate));
-
-      const float *from = reinterpret_cast<const float *>(bias_base) + n;
-      const auto value = *from;
-
-      *into = value;
-    }
-  };
-}
-
-Initializer InitializerGenerator::generateBias(const graph::operation::FullyConnected::Node &node)
-{
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
-
-  auto bias_base = _ctx.at(bias_index).data().base();
-  auto bias_type = _ctx.at(bias_index).typeInfo().type();
-  const auto bias_size = _ctx.at(bias_index).shape().asVector();
-
-  switch (bias_type)
-  {
-    case ::neurun::graph::operand::DataType::TENSOR_FLOAT32:
-    {
-      return [bias_base, bias_size](::arm_compute::ITensor &tensor) {
-        for (int32_t n = 0; n < bias_size; ++n)
-        {
-          const ::arm_compute::Coordinates coordinate{n};
-
-          float *into = reinterpret_cast<float *>(tensor.ptr_to_element(coordinate));
-
-          const float *from = reinterpret_cast<const float *>(bias_base) + n;
-          const auto value = *from;
-
-          *into = value;
-        }
-      };
-    }
-    case ::neurun::graph::operand::DataType::TENSOR_QUANT8_ASYMM:
-    {
-      return [bias_base, bias_size](::arm_compute::ITensor &tensor) {
-        for (int32_t n = 0; n < bias_size; ++n)
-        {
-          const ::arm_compute::Coordinates coordinate{n};
-
-          uint8_t *into = reinterpret_cast<uint8_t *>(tensor.ptr_to_element(coordinate));
-
-          const uint8_t *from = reinterpret_cast<const uint8_t *>(bias_base) + n;
-          const auto value = *from;
-
-          *into = value;
-        }
-      };
-    }
-    default:
-    {
-      throw std::runtime_error("Not supported bias type");
-    }
-  }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
diff --git a/runtimes/neurun/src/backend/cpu/InitializerGenerator.h b/runtimes/neurun/src/backend/cpu/InitializerGenerator.h
deleted file mode 100644
index 42d37f48b..000000000
--- a/runtimes/neurun/src/backend/cpu/InitializerGenerator.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__
-#define __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__
-
-#include "backend/IInitializerGenerator.h"
-
-#include "graph/operand/Set.h"
-
-namespace neurun
-{
-namespace backend
-{
-namespace cpu
-{
-
-class InitializerGenerator : public IInitializerGenerator
-{
-public:
-  InitializerGenerator(const neurun::graph::operand::Set &ctx);
-
-  Initializer generateWeight(const graph::operation::Conv2D::Implicit::Node &node) override;
-  Initializer generateWeight(const graph::operation::FullyConnected::Node &node) override;
-
-  Initializer generateBias(const graph::operation::Conv2D::Implicit::Node &node) override;
-  Initializer generateBias(const graph::operation::FullyConnected::Node &node) override;
-
-private:
-  const neurun::graph::operand::Set &_ctx;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace neurun
-
-#endif // __NEURUN_BACKEND_CPU_INITIALIZER_GENERATOR_H__
diff --git a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc b/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc
deleted file mode 100644
index 13d2a7ffc..000000000
--- a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//#include "internal/cpu/MemoryAllocator.h"
diff --git a/runtimes/neurun/src/backend/cpu/MemoryAllocator.h b/runtimes/neurun/src/backend/cpu/MemoryAllocator.h
deleted file mode 100644
index e3550ac07..000000000
--- a/runtimes/neurun/src/backend/cpu/MemoryAllocator.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_CPU_MEMORY_ALLOCATOR_H__
-#define __INTERNAL_CPU_MEMORY_ALLOCATOR_H__
-
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "arm_compute/runtime/Memory.h"
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class Coordinates;
-class TensorInfo;
-class Tensor;
-};
-
-/** Basic implementation of a CPU memory tensor allocator. */
-class TensorAllocator : public ITensorAllocator
-{
-public:
-  /** Default constructor. */
-  TensorAllocator(Tensor *owner = nullptr);
-  /** Default destructor */
-  ~TensorAllocator();
-
-  /** Make ITensorAllocator's init methods available */
-  using ITensorAllocator::init;
-
-  /** Shares the same backing memory with another tensor allocator, while the tensor info might be
-   * different.
-   *  In other words this can be used to create a sub-tensor from another tensor while sharing the
-   * same memory.
-   *
-   * @note TensorAllocator have to be of the same specialized type.
-   *
-   * @param[in] allocator The allocator that owns the backing memory to be shared. Ownership becomes
-   * shared afterwards.
-   * @param[in] coords    The starting coordinates of the new tensor inside the parent tensor.
-   * @param[in] sub_info  The new tensor information (e.g. shape etc)
-   */
-  void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info);
-
-  /** Returns the pointer to the allocated data. */
-  uint8_t *data() const;
-
-  /** Allocate size specified by TensorInfo of CPU memory.
-   *
-   * @note The tensor must not already be allocated when calling this function.
-   *
-   */
-  void allocate() override;
-
-  /** Free allocated CPU memory.
-   *
-   * @note The tensor must have been allocated when calling this function.
-   *
-   */
-  void free() override;
-  /** Import an existing memory as a tensor's backing memory
-   *
-   * @warning If the tensor is flagged to be managed by a memory manager,
-   *          this call will lead to an error.
-   * @warning Ownership of memory depends on the way the @ref Memory object was constructed
-   * @note    Calling free on a tensor with imported memory will just clear
-   *          the internal pointer value.
-   *
-   * @param[in] memory Memory to import
-   *
-   * @return error status
-   */
-  arm_compute::Status import_memory(Memory memory);
-  /** Associates the tensor with a memory group
-   *
-   * @param[in] associated_memory_group Memory group to associate the tensor with
-   */
-  void set_associated_memory_group(MemoryGroup *associated_memory_group);
-
-protected:
-  /** No-op for CPU memory
-   *
-   * @return A pointer to the beginning of the tensor's allocation.
-   */
-  uint8_t *lock() override;
-
-  /** No-op for CPU memory. */
-  void unlock() override;
-
-private:
-  MemoryGroup *_associated_memory_group; /**< Registered memory manager */
-  Memory _memory;                        /**< CPU memory */
-  Tensor *_owner;                        /**< Owner of the allocator */
-};
-
-namespace internal
-{
-namespace cpu
-{
-
-class MemoryAllocator : public
-{
-};
-
-} // namespace cpu
-} // namespace internal
-
-#endif // __INTERNAL_CPU_MEMORY_ALLOCATOR_H__
diff --git a/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc b/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc
new file mode 100644
index 000000000..2d0995b8a
--- /dev/null
+++ b/runtimes/neurun/src/backend/cpu/MemoryPlanner.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlanner.h"
+#include "util/logging.h"
+#include <cassert>
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+
+Allocator::Allocator(uint32_t capacity)
+{
+  assert(!_base && capacity != 0);
+
+  _base = new uint8_t[capacity];
+
+  VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
+  VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base) << std::endl;
+}
+
+Allocator::~Allocator() { delete[] _base; }
+
+void BumpPlanner::claim(const model::operand::Index &ind, size_t size)
+{
+  assert(size != 0);
+
+  Block blk{_capacity, size};
+  _mem_plans[ind] = blk;
+  _capacity += size;
+
+  VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
+                      << std::endl;
+}
+
+void BumpPlanner::release(const model::operand::Index &ind)
+{
+  VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
+                      << "NOTHING does" << std::endl;
+}
+
+// There are some assumptions for claiming memory(== making a reservation for memory).
+// 1. About _claim_table(std::map).
+//   - The table's data structure is std::map so that it always sorts
+//     value(model::operand::Index) by key(base_offset).
+//   - This claim() inserts key/value into _claim_table and the release() removes the key/value from
+//     _claim_table.
+//   - _claim_table shows the memory status at a certain point in time. Therefore,
+//     - If _claim_table has an offset and a certain size at a certain point in time,
+//       it means the place at the offset has been already claimed(== can't claim now. need to find
+//       someplace new).
+//     - If _claim_table doesn't have any element for an offset and a certain size at a certain
+//       point in time, it means the place at the offset can be claimed.
+// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
+//    the previous claim_base_offset.
+void FirstFitPlanner::claim(const model::operand::Index &ind, size_t size)
+{
+  assert(size != 0);
+
+  // Find the right position for claiming
+  uint32_t next_offset = 0;
+  for (auto &mem_claim : _claim_table)
+  {
+    auto claimed_base_offset = mem_claim.first;
+    auto claimed_size = _mem_plans[mem_claim.second].size;
+    if (next_offset + size <= claimed_base_offset)
+    {
+      break;
+    }
+    else
+    {
+      next_offset = claimed_base_offset + claimed_size;
+    }
+  }
+
+  // Now next_offset is set to the proper offset
+  _claim_table[next_offset] = ind;
+  _mem_plans[ind] = {next_offset, size};
+
+  VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
+                      << std::endl;
+
+  if (_capacity < next_offset + size)
+  {
+    _capacity = next_offset + size;
+  }
+}
+
+void FirstFitPlanner::release(const model::operand::Index &ind)
+{
+  for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
+  {
+    if (it->second == ind)
+    {
+      uint32_t offset = it->first;
+      uint32_t index = ind.value();
+      uint32_t size = _mem_plans[ind].size;
+
+      _claim_table.erase(it);
+
+      VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
+                          << std::endl;
+      return;
+    }
+  }
+  assert(!"Cannot release for given index. It has been not claimed or released already.");
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
diff --git a/runtimes/neurun/src/backend/cpu/MemoryPlanner.h b/runtimes/neurun/src/backend/cpu/MemoryPlanner.h
new file mode 100644
index 000000000..4b2661223
--- /dev/null
+++ b/runtimes/neurun/src/backend/cpu/MemoryPlanner.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        MemoryPlanner.h
+ * @brief       This file contains Memory Planning related classes
+ */
+
+#ifndef __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__
+#define __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__
+
+#include <map>
+#include <unordered_map>
+
+#include "model/operand/Index.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+
+/**
+ * @brief Structure to have memory offset and size
+ */
+struct Block
+{
+  uint32_t offset;
+  uint32_t size;
+};
+
+/**
+ * @brief Class to allocate memory
+ */
+class Allocator
+{
+public:
+  Allocator(uint32_t capacity);
+  ~Allocator();
+  /**
+   * @brief Get memory base pointer
+   * @return base pointer
+   */
+  uint8_t *base() const { return _base; }
+
+private:
+  uint8_t *_base = nullptr;
+};
+
+/**
+ * @brief Interface to plan memory
+ */
+struct IMemoryPlanner
+{
+  using MemoryPlans = std::unordered_map<model::operand::Index, Block>;
+
+  /**
+   * @brief Claim memory for operand
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  virtual void claim(const model::operand::Index &, size_t) = 0;
+  /**
+   * @brief Release memory for operand
+   * @param[in] index The operand index
+   */
+  virtual void release(const model::operand::Index &) = 0;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  virtual uint32_t capacity() = 0;
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  virtual MemoryPlans &memory_plans() = 0;
+};
+
+/**
+ * @brief Class to plan memory by bump way
+ */
+class BumpPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by bump way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  virtual void claim(const model::operand::Index &, size_t) override;
+  /**
+   * @brief Release memory for operand by bump way
+   * @param[in] index The operand index
+   */
+  virtual void release(const model::operand::Index &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  virtual uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  virtual MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+};
+
+/**
+ * @brief Class to plan memory by firstfit way
+ */
+class FirstFitPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by firstfit way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  virtual void claim(const model::operand::Index &, size_t) override;
+  /**
+   * @brief Release memory for operand by firstfit way
+   * @param[in] index The operand index
+   */
+  virtual void release(const model::operand::Index &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  virtual uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  virtual MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+  // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
+  std::map<uint32_t, model::operand::Index> _claim_table;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_CPU_MEMORY_PLANNER_H__
diff --git a/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc b/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc
new file mode 100644
index 000000000..26d4d8858
--- /dev/null
+++ b/runtimes/neurun/src/backend/cpu/PluginClassesAllocator.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "TensorBuilder.h"
+#include "StageGenerator.h"
+#include "Config.h"
+#include "util/logging.h"
+
+extern "C" {
+neurun::backend::cpu::TensorBuilder *allocate_TensorBuilder()
+{
+  VERBOSE(allocate_TensorBuilder) << "loaded from CPU\n";
+  return new neurun::backend::cpu::TensorBuilder;
+}
+
+neurun::backend::cpu::StageGenerator *
+allocate_StageGenerator(const neurun::model::operand::Set &operand_ctx,
+                        const std::shared_ptr<neurun::backend::cpu::TensorBuilder> &tensor_builder)
+{
+  VERBOSE(allocate_StageGenerator) << "loaded from CPU\n";
+  return new neurun::backend::cpu::StageGenerator(operand_ctx, tensor_builder);
+}
+
+neurun::backend::cpu::Config *allocate_Config()
+{
+  VERBOSE(allocate_Config) << "loaded from CPU\n";
+  return new neurun::backend::cpu::Config;
+}
+}
diff --git a/runtimes/neurun/src/backend/cpu/StageGenerator.cc b/runtimes/neurun/src/backend/cpu/StageGenerator.cc
index b7a3fa24a..c53b320a4 100644
--- a/runtimes/neurun/src/backend/cpu/StageGenerator.cc
+++ b/runtimes/neurun/src/backend/cpu/StageGenerator.cc
@@ -18,7 +18,8 @@
 
 #include <stdexcept>
 
-#include "internal/Padding.h"
+#include "cpp14/memory.h"
+#include "util/Padding.h"
 #include "kernel/cpu/OperationUtils.h"
 #include "kernel/cpu/ConvolutionLayer.h"
 #include "kernel/cpu/AvgPoolLayer.h"
@@ -27,12 +28,13 @@
 #include "kernel/cpu/FullyConnectedLayer.h"
 #include "kernel/cpu/ReshapeLayer.h"
 #include "kernel/cpu/SoftMaxLayer.h"
+#include "kernel/cpu/PermuteLayer.h"
+#include "backend/BackendManager.h"
+#include "backend/interface/IConfig.h"
 
-#include "logging.h"
+#include "util/logging.h"
 
-#include "support/nnapi/Utils.h"
-
-#include "logging.h"
+#include "util/Utils.h"
 
 namespace neurun
 {
@@ -41,25 +43,27 @@ namespace backend
 namespace cpu
 {
 
-StageGenerator::StageGenerator(const neurun::graph::operand::Set &operand_ctx,
+StageGenerator::StageGenerator(const neurun::model::operand::Set &operand_ctx,
                                const std::shared_ptr<TensorBuilder> &tensor_builder)
     : _ctx(operand_ctx), _tensor_builder(tensor_builder)
 {
   // DO NOTHING
 }
 
-Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::Conv2DNode &node)
 {
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index ker_index{node.getInputs().at(1)};
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
+  using model::operation::Conv2DNode;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  const auto padding_index{node.param().padding_index};
+  const auto activation_index{node.param().activation_index};
 
   const PaddingCode padding_type =
       static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
@@ -67,7 +71,7 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
   assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
          (ANEURALNETWORKS_PADDING_VALID == padding_type));
 
-  ::internal::Stride stride;
+  util::Stride stride;
 
   stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
   stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
@@ -75,28 +79,28 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
-    int ker_index;
-    int bias_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
+    model::operand::Index ker_index;
+    model::operand::Index bias_index;
 
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
     ::neurun::kernel::cpu::Shape ker_shape;
     ::neurun::kernel::cpu::Shape bias_shape;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    util::Padding padding;
+    util::Stride stride;
 
     FuseCode activation;
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
-  param.ker_index = ker_index.asInt();
-  param.bias_index = bias_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
+  param.ker_index = ker_index;
+  param.bias_index = bias_index;
 
   param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index));
   param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ifm_index));
@@ -105,21 +109,21 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
 
   param.stride = stride;
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(),
-                                                 _ctx.at(ofm_index).shape().asFeature(), stride,
-                                                 _ctx.at(ker_index).shape().asKernel().W,
-                                                 _ctx.at(ker_index).shape().asKernel().H)
-                      : ::internal::valid_padding();
+                      ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
+                                           _ctx.at(ofm_index).shape().asFeature(), stride,
+                                           _ctx.at(ker_index).shape().asKernel().W,
+                                           _ctx.at(ker_index).shape().asKernel().H)
+                      : util::valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index});
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index});
-    auto ker_alloc = tensors->at(::neurun::graph::operand::Index{param.ker_index});
-    auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index});
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index);
+    auto ifm_alloc = tensors->at(param.ifm_index);
+    auto ker_alloc = tensors->at(param.ker_index);
+    auto bias_alloc = tensors->at(param.bias_index);
 
     std::unique_ptr<::neurun::kernel::cpu::ConvolutionLayer> fn{
         new ::neurun::kernel::cpu::ConvolutionLayer};
@@ -130,24 +134,22 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
                   param.stride.vertical, param.activation, ofm_alloc->buffer(), param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::MaxPool2DNode &node)
 {
-  VERBOSE(MaxPool2D) << "generate CPU MaxPool2D" << std::endl;
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};
 
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
+  const auto kh_index{node.param().kh_index};
+  const auto kw_index{node.param().kw_index};
 
-  const ::neurun::graph::operand::Index kh_index{node.param().kh_index};
-  const ::neurun::graph::operand::Index kw_index{node.param().kw_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
-
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  const auto padding_index{node.param().padding_index};
+  const auto activation_index{node.param().activation_index};
 
   const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
   const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
@@ -161,8 +163,8 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
 
     uint32_t kw;
     uint32_t kh;
@@ -170,16 +172,16 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    util::Padding padding;
+    util::Stride stride;
 
     FuseCode activation;
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
 
   param.kh = kh;
   param.kw = kw;
@@ -192,30 +194,17 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
 
   param.padding =
       (padding_type == ANEURALNETWORKS_PADDING_SAME)
-          ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(),
-                                     _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
-          : ::internal::valid_padding();
+          ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
+                               _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
+          : util::valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
-  VERBOSE(MaxPool2D) << "IFM_H: " << _ctx.at(ifm_index).shape().asFeature().H << std::endl;
-  VERBOSE(MaxPool2D) << "IFM_W: " << _ctx.at(ifm_index).shape().asFeature().W << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_H: " << _ctx.at(ofm_index).shape().asFeature().H << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_W: " << _ctx.at(ofm_index).shape().asFeature().W << std::endl;
-  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_H: " << vstride << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_W: " << hstride << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl;
-
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get();
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto ifm_alloc = tensors->at(param.ifm_index).get();
 
     std::unique_ptr<::neurun::kernel::cpu::MaxPoolLayer> fn{
         new ::neurun::kernel::cpu::MaxPoolLayer};
@@ -226,24 +215,22 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
                   param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::AvgPool2DNode &node)
 {
-  VERBOSE(AvgPool2D) << "generate CPU AvgPool2D" << std::endl;
-
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};
 
-  const ::neurun::graph::operand::Index kh_index{node.param().kh_index};
-  const ::neurun::graph::operand::Index kw_index{node.param().kw_index};
+  const auto kh_index{node.param().kh_index};
+  const auto kw_index{node.param().kw_index};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  const auto padding_index{node.param().padding_index};
+  const auto activation_index{node.param().activation_index};
 
   const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
   const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
@@ -260,8 +247,8 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
 
     uint32_t kw;
     uint32_t kh;
@@ -269,16 +256,16 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    util::Padding padding;
+    util::Stride stride;
 
     FuseCode activation;
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
 
   param.kh = kh;
   param.kw = kw;
@@ -291,31 +278,17 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
 
   param.padding =
       (padding_type == ANEURALNETWORKS_PADDING_SAME)
-          ? ::internal::same_padding(_ctx.at(ifm_index).shape().asFeature(),
-                                     _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
-          : ::internal::valid_padding();
+          ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
+                               _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
+          : util::valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
-  VERBOSE(AvgPool2D) << "IFM_H: " << _ctx.at(ifm_index).shape().asFeature().H << std::endl;
-  VERBOSE(AvgPool2D) << "IFM_W: " << _ctx.at(ifm_index).shape().asFeature().W << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_H: " << _ctx.at(ofm_index).shape().asFeature().H << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_W: " << _ctx.at(ofm_index).shape().asFeature().W << std::endl;
-  VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_H: " << vstride << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_W: " << hstride << std::endl;
-  VERBOSE(AvgPool2D) << "PAD: " << ::nnfw::support::nnapi::to_string(padding_type) << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(R): " << param.padding.right << std::endl;
-
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get();
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto ifm_alloc = tensors->at(param.ifm_index).get();
 
     std::unique_ptr<::neurun::kernel::cpu::AvgPoolLayer> fn{
         new ::neurun::kernel::cpu::AvgPoolLayer};
@@ -326,20 +299,18 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
                   param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Concat::Node &node)
+void StageGenerator::visit(const model::operation::ConcatNode &node)
 {
-  VERBOSE(Concat) << "generate CPU Concat" << std::endl;
-
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index axis_index{node.param().axis_index};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto axis_index{node.param().axis_index};
 
   struct Param
   {
-    int32_t output_index;
-    std::vector<int32_t> input_indexes;
+    model::operand::Index output_index;
+    std::vector<model::operand::Index> input_indexes;
 
     int32_t axis;
 
@@ -349,10 +320,10 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node)
 
   Param param;
 
-  param.output_index = ofm_index.asInt();
+  param.output_index = ofm_index;
   for (const auto &e : node.getInputs())
   {
-    param.input_indexes.emplace_back(e.asInt());
+    param.input_indexes.emplace_back(e);
   }
   param.axis = _ctx.at(axis_index).asScalar<int32_t>();
 
@@ -365,14 +336,13 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node)
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
 
     std::vector<const uint8_t *> input_buffers;
     for (auto ifm_ind : param.input_indexes)
     {
-      input_buffers.emplace_back(
-          tensors->at(::neurun::graph::operand::Index{ifm_ind}).get()->buffer());
+      input_buffers.emplace_back(tensors->at(ifm_ind).get()->buffer());
     }
 
     std::unique_ptr<::neurun::kernel::cpu::ConcatLayer> fn{new ::neurun::kernel::cpu::ConcatLayer};
@@ -381,26 +351,26 @@ Stage StageGenerator::generate(const graph::operation::Concat::Node &node)
                   param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &node)
+void StageGenerator::visit(const model::operation::FullyConnectedNode &node)
 {
-  VERBOSE(FullyConnected) << "generate CPU FullyConnected" << std::endl;
+  using model::operation::FullyConnectedNode;
 
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index weight_index{node.getInputs().at(1)};
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)};
+  const auto activation_index{node.param().activation_index};
 
   // Construct operation parameters
   struct Param
   {
-    int output_index;
-    int input_index;
-    int weight_index;
-    int bias_index;
+    model::operand::Index output_index;
+    model::operand::Index input_index;
+    model::operand::Index weight_index;
+    model::operand::Index bias_index;
 
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
@@ -412,10 +382,10 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
-  param.weight_index = weight_index.asInt();
-  param.bias_index = bias_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
+  param.weight_index = weight_index;
+  param.bias_index = bias_index;
 
   param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
   param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));
@@ -426,11 +396,11 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
-    auto weight_alloc = tensors->at(::neurun::graph::operand::Index{param.weight_index}).get();
-    auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
+    auto weight_alloc = tensors->at(param.weight_index).get();
+    auto bias_alloc = tensors->at(param.bias_index).get();
 
     std::unique_ptr<::neurun::kernel::cpu::FullyConnectedLayer> fn{
         new ::neurun::kernel::cpu::FullyConnectedLayer};
@@ -440,18 +410,18 @@ Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &nod
                   output_alloc->buffer(), param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Reshape::Node &node)
+void StageGenerator::visit(const model::operation::ReshapeNode &node)
 {
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)};
 
   struct Param
   {
-    int output_index;
-    int input_index;
+    model::operand::Index output_index;
+    model::operand::Index input_index;
 
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
@@ -459,17 +429,17 @@ Stage StageGenerator::generate(const graph::operation::Reshape::Node &node)
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
 
   param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
   param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
 
     std::unique_ptr<::neurun::kernel::cpu::ReshapeLayer> fn{
         new ::neurun::kernel::cpu::ReshapeLayer};
@@ -477,21 +447,19 @@ Stage StageGenerator::generate(const graph::operation::Reshape::Node &node)
     fn->configure(input_alloc->buffer(), param.ifm_shape, output_alloc->buffer(), param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Softmax::Node &node)
+void StageGenerator::visit(const model::operation::SoftmaxNode &node)
 {
-  VERBOSE(Softmax) << "generate CPU Softmax" << std::endl;
-
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index scale_index{node.param().scale_index};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)};
+  const auto scale_index{node.param().scale_index};
 
   struct Param
   {
-    int output_index;
-    int input_index;
+    model::operand::Index output_index;
+    model::operand::Index input_index;
 
     ::neurun::kernel::cpu::Shape ofm_shape;
     ::neurun::kernel::cpu::Shape ifm_shape;
@@ -501,8 +469,8 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node)
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
 
   param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
   param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));
@@ -511,9 +479,9 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node)
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
 
     std::unique_ptr<::neurun::kernel::cpu::SoftMaxLayer> fn{
         new ::neurun::kernel::cpu::SoftMaxLayer};
@@ -522,15 +490,58 @@ Stage StageGenerator::generate(const graph::operation::Softmax::Node &node)
                   param.ofm_shape);
 
     builder.append(std::move(fn));
-  };
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::NOP::Node & /* node */)
+void StageGenerator::visit(const model::operation::PermuteNode &node)
 {
-  // DO NOTHING
-  return nullptr;
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  using PermuteType = model::operation::PermuteNode::Type;
+
+  struct Param
+  {
+    model::operand::Index output_index;
+    model::operand::Index input_index;
+
+    model::operand::Shape shape;
+
+    PermuteType type{PermuteType::COPY};
+  };
+
+  Param param;
+
+  param.output_index = output_index;
+  param.input_index = input_index;
+
+  param.shape = _ctx.at(output_index).shape();
+  param.type = node.param().type;
+
+  //  assert(param.shape == _ctx.at(input_index));
+
+  const auto &input_li = _ctx.at(input_index).lower_info();
+  const auto &output_li = _ctx.at(output_index).lower_info();
+  const auto input_backend = input_li->def_backends().getOnlyElement();
+  const auto output_backend = output_li->def_backends().getOnlyElement();
+
+  const auto input_tensors = input_backend->tensor_builder();
+  const auto output_tensors = output_backend->tensor_builder();
+
+  returnStage([input_tensors, output_tensors, param](IExecutionBuilder &builder) {
+    auto output_object = output_tensors->wrapTensor(param.output_index);
+    auto input_object = input_tensors->wrapTensor(param.input_index);
+
+    auto fn = nnfw::cpp14::make_unique<::neurun::kernel::cpu::PermuteLayer>();
+
+    fn->configure(input_object, output_object, param.shape, param.type);
+
+    builder.append(std::move(fn));
+  });
 }
 
+void StageGenerator::visit(const model::operation::AddNode &) { throw std::runtime_error("NYI"); }
+
 } // namespace neurun
 } // namespace backend
 } // namespace cpu
diff --git a/runtimes/neurun/src/backend/cpu/StageGenerator.h b/runtimes/neurun/src/backend/cpu/StageGenerator.h
index acdd2c8b2..6a0e387da 100644
--- a/runtimes/neurun/src/backend/cpu/StageGenerator.h
+++ b/runtimes/neurun/src/backend/cpu/StageGenerator.h
@@ -17,9 +17,9 @@
 #ifndef __NEURUN_BACKEND_CPU_STAGE_GENERATOR_H__
 #define __NEURUN_BACKEND_CPU_STAGE_GENERATOR_H__
 
-#include "backend/IStageGenerator.h"
+#include "backend/interface/IStageGenerator.h"
 
-#include "graph/operand/Set.h"
+#include "model/operand/Set.h"
 #include "backend/cpu/operand/Tensor.h"
 #include "TensorBuilder.h"
 
@@ -33,22 +33,18 @@ namespace cpu
 class StageGenerator : public IStageGenerator
 {
 public:
-  StageGenerator(const neurun::graph::operand::Set &ctx,
+  StageGenerator(const neurun::model::operand::Set &ctx,
                  const std::shared_ptr<TensorBuilder> &tensor_builder);
 
   virtual std::shared_ptr<ITensorBuilder> tensor_builder() override { return _tensor_builder; }
 
-  virtual Stage generate(const graph::operation::Conv2D::Implicit::Node &node) override;
-  virtual Stage generate(const graph::operation::MaxPool2D::Implicit::Node &node) override;
-  virtual Stage generate(const graph::operation::AvgPool2D::Implicit::Node &node) override;
-  virtual Stage generate(const graph::operation::Concat::Node &node) override;
-  virtual Stage generate(const graph::operation::FullyConnected::Node &node) override;
-  virtual Stage generate(const graph::operation::Reshape::Node &node) override;
-  virtual Stage generate(const graph::operation::Softmax::Node &node) override;
-  virtual Stage generate(const graph::operation::NOP::Node &node) override;
+#define OP(InternalName, IsNnApi, NnApiName) \
+  virtual void visit(const model::operation::InternalName &) override;
+#include "model/operation/Op.lst"
+#undef OP
 
 private:
-  const neurun::graph::operand::Set &_ctx;
+  const neurun::model::operand::Set &_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
 };
 
diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc
index 1b972a830..9c39b9c00 100644
--- a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc
+++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc
@@ -19,6 +19,7 @@
 #include <cassert>
 
 #include "operand/Object.h"
+#include "util/logging.h"
 
 namespace neurun
 {
@@ -27,43 +28,93 @@ namespace backend
 namespace cpu
 {
 
-TensorBuilder::TensorBuilder()
+TensorBuilder::TensorBuilder() : _mem_planner(std::make_shared<FirstFitPlanner>())
 {
   // DO NOTHING
 }
 
-void TensorBuilder::mark(const ::neurun::graph::operand::Index &ind)
+void TensorBuilder::registerTensorInfo(const model::operand::Index &ind,
+                                       const compiler::TensorInfo &info)
 {
-  assert(_tensors.size() == 0);
+  _tensor_info_map.insert({ind, info});
+}
+
+void TensorBuilder::registerSubTensorInfo(const model::operand::Index &,
+                                          const compiler::SubTensorInfo &)
+{
+  // Not supported yet
+  assert(false);
+}
+
+void TensorBuilder::notifyFirstUse(const model::operand::Index &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto &info = _tensor_info_map.at(ind);
 
-  _inds.insert(ind);
+  const auto size = info.total_size();
+  _mem_planner->claim(ind, size);
 }
 
-void TensorBuilder::prepare(codegen::Plan &plan,
-                            const std::map<int, ::arm_compute::TensorInfo> &tensor_info_ctx)
+void TensorBuilder::notifyLastUse(const model::operand::Index &ind) { _mem_planner->release(ind); }
+
+void TensorBuilder::prepare(void)
 {
   assert(_tensors.size() == 0);
 
-  for (auto ind_int : _inds)
+  _mem_alloc = std::make_shared<Allocator>(_mem_planner->capacity());
+  assert(_mem_alloc->base());
+
+  for (auto &mem_plan : _mem_planner->memory_plans())
   {
-    ::neurun::graph::operand::Index ind{ind_int};
-    auto tensor = std::make_shared<operand::Tensor>(tensor_info_ctx.at(ind.asInt()));
-    // TODO Fix allocation here. When Tensor object is created the memory for tensor is also
-    //      allocated, and this must be fixed.
-    plan.operands().set(ind, std::make_shared<operand::Object>(tensor));
+    auto ind = mem_plan.first;
+    auto mem_blk = mem_plan.second;
+    const auto &info = _tensor_info_map[ind];
+
+    uint8_t *buffer = _mem_alloc->base() + mem_blk.offset;
+    auto tensor = std::make_shared<operand::Tensor>(info);
+    tensor->setBuffer(buffer);
     _tensors[ind] = tensor;
+
+    VERBOSE(CPU_TENSORBUILDER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer)
+                               << std::endl;
+
+    // If we do not make tensor here currently, stages would cause segment fault
   }
 }
 
 void TensorBuilder::allocate(void)
 {
-  assert(_inds.size() == _tensors.size());
-
   // NOTE For now nothing to do. Allocation is done in prepare stage, which is wrong
-  //      See also: comment in `prepare()`
 }
 
-std::shared_ptr<operand::Tensor> TensorBuilder::at(const ::neurun::graph::operand::Index &ind)
+std::shared_ptr<::neurun::backend::operand::ITensor>
+TensorBuilder::tensorAt(const model::operand::Index &ind)
+{
+  return _tensors.at(ind);
+}
+
+std::shared_ptr<backend::operand::IObject>
+TensorBuilder::wrapTensor(const model::operand::Index &ind)
+{
+  if (_objects.find(ind) != _objects.end())
+  {
+    return _objects.at(ind);
+  }
+  else
+  {
+    return _objects[ind] = std::make_shared<operand::Object>(_tensors.at(ind));
+  }
+}
+
+void TensorBuilder::iterate(const IterateFunction &fn)
+{
+  for (auto it : _tensors)
+  {
+    fn(it.first);
+  }
+}
+
+std::shared_ptr<operand::Tensor> TensorBuilder::at(const ::neurun::model::operand::Index &ind)
 {
   return _tensors.at(ind);
 }
diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.h b/runtimes/neurun/src/backend/cpu/TensorBuilder.h
index f61a930fe..2715d57f0 100644
--- a/runtimes/neurun/src/backend/cpu/TensorBuilder.h
+++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.h
@@ -18,11 +18,12 @@
 #define __NEURUN_BACKEND_CPU_TENSOR_BUILDER_H__
 
 #include <unordered_map>
-#include <unordered_set>
 
-#include "backend/ITensorBuilder.h"
+#include "backend/interface/ITensorBuilder.h"
 #include "backend/cpu/operand/Tensor.h"
-#include "graph/operand/Index.h"
+#include "backend/cpu/operand/Object.h"
+#include "model/operand/Index.h"
+#include "MemoryPlanner.h"
 
 namespace neurun
 {
@@ -31,23 +32,47 @@ namespace backend
 namespace cpu
 {
 
-class Plan;
-
 class TensorBuilder : public ITensorBuilder
 {
 public:
   TensorBuilder();
 
-  virtual void mark(const ::neurun::graph::operand::Index &ind) override;
-  virtual void prepare(codegen::Plan &plan,
-                       const std::map<int, ::arm_compute::TensorInfo> &tensor_info_ctx) override;
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind   Operand index
+   * @param[in] info  Tensor information
+   */
+  virtual void registerTensorInfo(const model::operand::Index &ind,
+                                  const compiler::TensorInfo &info) override;
+  /**
+   * @brief     Register subtensor information to allocate on CPU backend
+   * @param[in] ind   Operand index
+   * @param[in] info  Tensor information
+   */
+  virtual void registerSubTensorInfo(const model::operand::Index &ind,
+                                     const compiler::SubTensorInfo &info) override;
+
+  virtual void notifyFirstUse(const model::operand::Index &) override;
+  virtual void notifyLastUse(const model::operand::Index &) override;
+
+  virtual void prepare(void) override;
   virtual void allocate(void) override;
 
-  std::shared_ptr<operand::Tensor> at(const ::neurun::graph::operand::Index &ind);
+  virtual std::shared_ptr<::neurun::backend::operand::ITensor>
+  tensorAt(const model::operand::Index &ind) override;
+  virtual std::shared_ptr<backend::operand::IObject>
+  wrapTensor(const model::operand::Index &ind) override;
+  virtual void iterate(const IterateFunction &fn) override;
+
+  std::shared_ptr<operand::Tensor> at(const ::neurun::model::operand::Index &ind);
 
 private:
-  std::unordered_set<graph::operand::Index> _inds;
-  std::unordered_map<graph::operand::Index, std::shared_ptr<operand::Tensor>> _tensors;
+  std::unordered_map<model::operand::Index, compiler::TensorInfo> _tensor_info_map;
+  std::unordered_map<model::operand::Index, std::shared_ptr<operand::Tensor>> _tensors;
+  std::unordered_map<model::operand::Index, std::shared_ptr<operand::Object>> _objects;
+  std::unordered_map<model::operand::Index, Block> _tensor_mem_map;
+  std::shared_ptr<IMemoryPlanner> _mem_planner;
+  std::shared_ptr<Allocator> _mem_alloc;
 };
 
 } // namespace cpu
diff --git a/runtimes/neurun/src/backend/cpu/operand/Object.cc b/runtimes/neurun/src/backend/cpu/operand/Object.cc
index 52b63fba7..011747a8c 100644
--- a/runtimes/neurun/src/backend/cpu/operand/Object.cc
+++ b/runtimes/neurun/src/backend/cpu/operand/Object.cc
@@ -25,7 +25,8 @@ namespace cpu
 namespace operand
 {
 
-void Object::access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const
+void Object::access(
+    const std::function<void(::neurun::backend::operand::ITensor &tensor)> &fn) const
 {
   fn(*_tensor);
 }
diff --git a/runtimes/neurun/src/backend/cpu/operand/Object.h b/runtimes/neurun/src/backend/cpu/operand/Object.h
index 08f63f3dc..5ef7c4fbf 100644
--- a/runtimes/neurun/src/backend/cpu/operand/Object.h
+++ b/runtimes/neurun/src/backend/cpu/operand/Object.h
@@ -18,9 +18,9 @@
 #define __NEURUN_BACKEND_CPU_OPERAND_OBJECT_H__
 
 #include <memory>
-#include <arm_compute/core/ITensor.h>
+#include "backend/interface/operand/ITensor.h"
 
-#include "backend/IObject.h"
+#include "backend/interface/operand/IObject.h"
 
 namespace neurun
 {
@@ -37,19 +37,20 @@ public:
   Object() = default;
 
 public:
-  Object(const std::shared_ptr<::arm_compute::ITensor> &tensor) : _tensor{tensor}
+  Object(const std::shared_ptr<::neurun::backend::operand::ITensor> &tensor) : _tensor{tensor}
   {
     // DO NOTHING
   }
 
 public:
-  ::arm_compute::ITensor *ptr(void) const override { return _tensor.get(); }
+  ::neurun::backend::operand::ITensor *ptr(void) const override { return _tensor.get(); }
 
 private:
-  std::shared_ptr<::arm_compute::ITensor> _tensor;
+  std::shared_ptr<::neurun::backend::operand::ITensor> _tensor;
 
 public:
-  void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const override;
+  void
+  access(const std::function<void(::neurun::backend::operand::ITensor &tensor)> &fn) const override;
 };
 
 } // namespace operand
diff --git a/runtimes/neurun/src/backend/cpu/operand/Tensor.cc b/runtimes/neurun/src/backend/cpu/operand/Tensor.cc
index 0e4f34aac..a5251292e 100644
--- a/runtimes/neurun/src/backend/cpu/operand/Tensor.cc
+++ b/runtimes/neurun/src/backend/cpu/operand/Tensor.cc
@@ -16,6 +16,8 @@
 
 #include "Tensor.h"
 
+#define NO_USE(a) (void)(a)
+
 namespace neurun
 {
 namespace backend
@@ -25,7 +27,11 @@ namespace cpu
 namespace operand
 {
 
-// NO IMPLEMENTATION YET
+size_t Tensor::calcOffset(const neurun::util::feature::Coordinate4D &coords)
+{
+  NO_USE(coords);
+  throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
+}
 
 } // namespace operand
 } // namespace cpu
diff --git a/runtimes/neurun/src/backend/cpu/operand/Tensor.h b/runtimes/neurun/src/backend/cpu/operand/Tensor.h
index 83a99acf2..7500f890f 100644
--- a/runtimes/neurun/src/backend/cpu/operand/Tensor.h
+++ b/runtimes/neurun/src/backend/cpu/operand/Tensor.h
@@ -17,8 +17,8 @@
 #ifndef __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__
 #define __NEURUN_BACKEND_CPU_OPERAND_TENSOR_H__
 
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/core/TensorInfo.h>
+#include "backend/interface/operand/ITensor.h"
+#include "compiler/TensorInfo.h"
 
 namespace neurun
 {
@@ -29,38 +29,40 @@ namespace cpu
 namespace operand
 {
 
-class Tensor : public ::arm_compute::ITensor
+class Tensor : public ::neurun::backend::operand::ITensor
 {
 public:
-  Tensor() = default;
+  Tensor() = delete;
 
-  Tensor(::arm_compute::TensorInfo info) : _info(info)
-  {
-    // TODO Do not allocate buffer here. This tensor is just an abstract Tensor object for cpu.
-    uint32_t size = _info.total_size(); // NOTE This size may not be accurate
-    _buffer = new uint8_t[size];        // NOTE The allocated buffer is never deallocated.
-  }
-
-  Tensor(uint8_t *buffer) : _buffer(buffer)
+public:
+  Tensor(const compiler::TensorInfo &info) : _info(info)
   {
     // DO NOTHING
   }
 
 public:
   void setBuffer(uint8_t *buffer) { _buffer = buffer; }
+  ::neurun::model::operand::DataType data_type() const { return _info.typeInfo().type(); }
 
 public:
-  ::arm_compute::TensorInfo *info() const override
-  {
-    return const_cast<::arm_compute::TensorInfo *>(&_info);
-  }
-
-  ::arm_compute::TensorInfo *info() override { return &_info; }
-
   uint8_t *buffer() const override { return _buffer; }
+  /**
+   * @brief Get dimension by index
+   *
+   * @param index Index to get diemension
+   * @return size_t Dimension at index
+   * @note N : dimension(0)
+   *       H : dimension(1)
+   *       W : dimension(2)
+   *       C : dimension(3)
+   */
+  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
+  size_t num_dimensions() const override { return _info.shape().dims().size(); }
+  size_t total_size() const override { return _info.total_size(); }
+  size_t calcOffset(const neurun::util::feature::Coordinate4D &coords) override;
 
 private:
-  ::arm_compute::TensorInfo _info;
+  compiler::TensorInfo _info;
   uint8_t *_buffer = nullptr;
 };