Imported Upstream version 1.12.0upstream/1.12.0

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-12-14 14:43:43 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-12-14 14:43:43 +0900
commit: 62529acabbafce7730601ed01d5709d7bc0d378a (patch)
tree: bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert/backend
parent: 6ea13af5257155ff993c205cf997b870cc627f73 (diff)
download: nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
98 files changed, 4947 insertions, 686 deletions
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 42d622aa8..dc038c975 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -4,3 +4,5 @@ add_subdirectory(cpu)
 add_subdirectory(acl_cl)
 add_subdirectory(acl_neon)
 add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(xnnpack)
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 5c5041378..4f48314c1 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <backend/Backend.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+             bool is_linear_executor) const override
   {
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
new file mode 100644
index 000000000..a6f228a4f
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  // Prepare scanning
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    const auto &li = lower_info.operand.at(ind);
+    if (li->def_factors().getOnlyElement().backend() != backend())
+      continue;
+
+    // Ignore unused tensor
+    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+    {
+      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+                           << std::endl;
+      return;
+    }
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    auto factor = li->def_factors().getOnlyElement();
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any op_seq (No use and def)
+      const auto info = obj.info();
+      const auto backend_layout = factor.layout();
+      // TODO Change tensor info to have permuted shape
+      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+    }
+  }
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    for (const auto &op_idx : op_seq.operations())
+    {
+      auto &op = graph()->operations().at(op_idx);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+                 lower_info.operand.at(ind)->use_factors().size() == 1);
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  optimizer->optimize();
+
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                    ir::Remove::DUPLICATED;
+    for (const auto op_ind : op_seq)
+    {
+      bool op_assigned = [&]() {
+        for (auto &op_info : operation_list())
+          if (op_info.index == op_ind)
+            return true;
+        return false;
+      }();
+      if (!op_assigned)
+        continue;
+
+      const auto &op = graph()->operations().at(op_ind);
+      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+      {
+        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+        {
+          const auto &operand_lower_info =
+              lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+          // op.getOutputs() of permute (CPU) returns tensor A
+          // but tensor A belongs to the backend of acl_cl.
+          // So, we have to make this tensor NOT registered for CPU.
+          if (operand_lower_info.backend() != backend())
+            continue;
+
+          const auto &obj = graph()->operands().at(index);
+          const auto frontend_layout = op_seq.getLayout();
+          const auto backend_layout = operand_lower_info.layout();
+          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+        }
+      }
+    }
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    planTensors(order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  tensor_builder->allocate();
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) {
+      ifunc.prepare();
+      tensor_builder->postFunctionPrepare();
+    });
+  }
+
+  return ret;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
new file mode 100644
index 000000000..662d767d0
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+  std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index b45b91058..413a7ccc3 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node)
   const auto &axis_obj = _operands.at(axis_index);
 
   const auto ifm_rank = input_obj.shape().rank();
-  const auto frontend_layout = this->_current_op_seq_layout;
+  const auto frontend_layout = this->_current_layout;
 
   auto output_tensor = this->_tensor_reg->getITensor(output_index);
   const auto backend_layout = output_tensor->layout();
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 9f3acb461..fc0eca84f 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -45,4 +45,4 @@ public:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index e7690af2e..3a5ea5a0f 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<TensorBuilder> &tensor_builder,
     const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
   _return_fn_seq->enableDynamicShapeInferer(false);
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+      }
+    }
+  }
+
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                 ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
 
   std::unique_ptr<arm_compute::IFunction> fn;
@@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
 
   const auto &perms = _ctx.at(perm_idx);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
 
   const size_t output_rank = _ctx.at(output_idx).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
@@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
@@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   UNUSED_RELEASE(backend_layout);
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   auto ifm_shape = _ctx.at(ifm_index).shape();
   auto ofm_shape = _ctx.at(ofm_index).shape();
@@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-  auto frontend_layout = _current_op_seq_layout;
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
   int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   auto acl_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
   auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
-      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &ofm_ind : output_indexes)
     output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
@@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   {
     int32_t split_dim = split_dim_op.asScalar<int32_t>();
     uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = ifm_tensor->layout();
 
     if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
@@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
@@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto input = _tensor_reg->getAclTensor(input_index)->handle();
   auto output = _tensor_reg->getAclTensor(output_index)->handle();
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
 
   ::arm_compute::PaddingList padding_list;
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index e8a922677..22a7c18a3 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
 namespace acl_cl
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,60 +39,61 @@ public:
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
+
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
+  void visit(const ir::operation::EmbeddingLookup &) override;
   void visit(const ir::operation::ExpandDims &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
   void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::PReLU &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::ResizeNearestNeighbor &) override;
+  void visit(const ir::operation::Reverse &) override;
   void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::PReLU &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-  void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::TopKV2 &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SplitV &) override;
+  void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StridedSlice &) override;
+  void visit(const ir::operation::TopKV2 &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
-  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
-  void visit(const ir::operation::Reverse &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
 };
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
index 18d38ec1b..ad5154860 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.h
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_cl
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
index 88378b13a..82cbde02f 100644
--- a/runtime/onert/backend/acl_cl/acl_cl.cc
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
-  return new onert::backend::acl_cl::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
index 21f41a3e6..921d107d9 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
 
 AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
                                                const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+    : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index 52f4c54cf..894e2e7d1 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
+#include <backend/cpu_common/ConstantInitializerBase.h>
 #include <ir/Operands.h>
 #include "AclTensorRegistry.h"
 
@@ -28,7 +28,7 @@ namespace backend
 namespace acl_common
 {
 
-class AclConstantInitializer : public IConstantInitializer
+class AclConstantInitializer : public cpu_common::ConstantInitializerBase
 {
 public:
   AclConstantInitializer(const ir::Operands &operands,
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index bb7abc95d..12e9ab894 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -21,7 +21,6 @@
 #include <queue>
 
 #include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
 #include "ir/OperandIndexMap.h"
 #include <ir/Operands.h>
 #include "AclTensorManager.h"
@@ -43,14 +42,12 @@ enum class UsesType
   LAST
 };
 
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
 {
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
 
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -59,16 +56,16 @@ public:
    * @param[in] layout Tensor data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
+                          ir::Layout backend_layout);
 
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override;
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare();
 
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
@@ -105,7 +102,6 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
-  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
 
   // for linear executor
   std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -133,10 +129,9 @@ namespace acl_common
 {
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
-    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+                                                                     T_AclTensorManager *tensor_mgr)
+    : _operands{operands}, _tensor_mgr{tensor_mgr}
 {
   assert(_tensor_mgr);
 }
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67d9d7176..7d3a69032 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -109,13 +109,19 @@ namespace acl_common
     case ir::DataType::UINT8:
       return ::arm_compute::DataType::U8;
     case ir::DataType::QUANT_INT8_SYMM:
-      return ::arm_compute::DataType::S8;
+      return ::arm_compute::DataType::QSYMM8;
+    case ir::DataType::QUANT_INT8_ASYMM:
+      return ::arm_compute::DataType::QASYMM8_SIGNED;
     case ir::DataType::FLOAT16:
       return ::arm_compute::DataType::F16;
     case ir::DataType::INT64:
       return ::arm_compute::DataType::S64;
+    case ir::DataType::QUANT_INT16_ASYMM:
+      return ::arm_compute::DataType::QASYMM16;
+    case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+      return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
     default:
-      throw std::runtime_error("Not supported, yet");
+      throw std::runtime_error("Not supported internal data type, yet");
       break;
   }
 }
@@ -175,7 +181,7 @@ namespace acl_common
       return ::arm_compute::ActivationLayerInfo{
           ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal activation, yet"};
       break;
   }
 }
@@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
       return ::arm_compute::ActivationLayerInfo{
           ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal elementwise activation, yet"};
       break;
   }
 }
@@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
       return ir::DataType::UINT32;
     case ::arm_compute::DataType::QASYMM8:
       return ir::DataType::QUANT_UINT8_ASYMM;
+    case ::arm_compute::DataType::QASYMM8_SIGNED:
+      return ir::DataType::QUANT_INT8_ASYMM;
     case ::arm_compute::DataType::U8:
       return ir::DataType::UINT8;
     case ::arm_compute::DataType::QSYMM8:
@@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
     case ::arm_compute::DataType::S64:
       return ir::DataType::INT64;
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported acl data type, yet"};
       break;
   }
 }
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 35d6e4e8e..b11c19733 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -21,6 +21,7 @@
 #include <backend/Backend.h>
 #include <ir/Operands.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+             bool is_linear_executor) const override
   {
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
new file mode 100644
index 000000000..8b53171f7
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  // Prepare scanning
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    const auto &li = lower_info.operand.at(ind);
+    if (li->def_factors().getOnlyElement().backend() != backend())
+      continue;
+
+    // Ignore unused tensor
+    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+    {
+      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+                           << std::endl;
+      return;
+    }
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    auto factor = li->def_factors().getOnlyElement();
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any op_seq (No use and def)
+      const auto info = obj.info();
+      const auto backend_layout = factor.layout();
+      // TODO Change tensor info to have permuted shape
+      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+    }
+  }
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    for (const auto &op_idx : op_seq.operations())
+    {
+      auto &op = graph()->operations().at(op_idx);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+                 lower_info.operand.at(ind)->use_factors().size() == 1);
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  optimizer->optimize();
+
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                    ir::Remove::DUPLICATED;
+    for (const auto op_ind : op_seq)
+    {
+      bool op_assigned = [&]() {
+        for (auto &op_info : operation_list())
+          if (op_info.index == op_ind)
+            return true;
+        return false;
+      }();
+      if (!op_assigned)
+        continue;
+
+      const auto &op = graph()->operations().at(op_ind);
+      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+      {
+        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+        {
+          const auto &operand_lower_info =
+              lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+          // op.getOutputs() of permute (CPU) returns tensor A
+          // but tensor A belongs to the backend of acl_cl.
+          // So, we have to make this tensor NOT registered for CPU.
+          if (operand_lower_info.backend() != backend())
+            continue;
+
+          const auto &obj = graph()->operands().at(index);
+          const auto frontend_layout = op_seq.getLayout();
+          const auto backend_layout = operand_lower_info.layout();
+          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+        }
+      }
+    }
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    planTensors(order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  tensor_builder->allocate();
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) {
+      ifunc.prepare();
+      tensor_builder->postFunctionPrepare();
+    });
+  }
+
+  return ret;
+}
+
+} // namespace neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
new file mode 100644
index 000000000..dd764c091
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+  std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index c7d71cdcf..9723ba012 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -41,4 +41,4 @@ public:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index ffaee3b3e..e712dfa81 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<TensorBuilder> &tensor_builder,
     const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
   _return_fn_seq->enableDynamicShapeInferer(false);
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto frontend_layout = _current_op_seq_layout;
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
   int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   assert(axis_value >= 0 && axis_value < ifm_rank);
   const auto fixed_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
 
   auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
-      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-      arm_compute::ReductionOperation::ARG_IDX_MAX);
+      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+      }
+    }
+  }
+
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                 ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
@@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   {
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
     const auto axis =
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
@@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
   const auto reduce_axes =
       acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
@@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &ofm_ind : output_indexes)
     output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
@@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
   const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
@@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
@@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
 
   const size_t output_rank = _ctx.at(out_idx).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index 4d269cde5..2a4b307b8 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
 namespace acl_neon
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,17 +39,20 @@ public:
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::ArgMax &) override;
+
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
+  void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::FullyConnected &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::HashtableLookup &) override;
@@ -57,36 +60,34 @@ public:
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Squeeze &) override;
   void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::OneHot &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
 };
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
index 5fe0d519c..b8fb343e9 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.h
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_neon
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
index f490d132d..6535fb291 100644
--- a/runtime/onert/backend/acl_neon/acl_neon.cc
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
-  return new onert::backend::acl_neon::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..0b416a7e9 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -54,8 +54,6 @@ public:
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
                                                             context->external_context());
-    context->tensor_register = nullptr;
-    context->optimizer = nullptr;
     return context;
   }
 
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..6b958c1b7
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..0a4106d33 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,9 @@
 #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
 
 #include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
 #include "ExternalContext.h"
 
 namespace onert
@@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
                  std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
-                                       constant_initializer, kernel_gen, tensor_register,
-                                       optimizer),
-        _external_context(new ExternalContext)
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
   {
   }
 
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }
 
 private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
   // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
   //      the thread pool is also created in duplicate
   // TODO Create one ruy context for session
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
deleted file mode 100644
index 6f6eb77bc..000000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-{
-  // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
-                                                     const ir::Operand &obj)
-{
-  registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
-    auto data = model_obj.shareData();
-    assert(data && data->base());
-    ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
-    tensor.setData(data);
-  };
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto &weight_obj = _operands.at(weight_index);
-  registerExternalInitializer(weight_index, weight_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  if (!bias_index.undefined())
-  {
-    const auto &bias_obj = _operands.at(bias_index);
-    registerExternalInitializer(bias_index, bias_obj);
-  }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index c016c83bc..d7858c0f6 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -14,13 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
 
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
 
 namespace onert
 {
@@ -29,35 +26,10 @@ namespace backend
 namespace cpu
 {
 
-class ConstantInitializer : public IConstantInitializer
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
-  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
-  // TODO: For now the only cpu backend supports constant tensor to use data from external
-  // If the other backend supports (to do this,
-  // ExternalTensor should be abstract such as IExternal, maybe),
-  // this can be an interface of IConstantInitializer
-  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
 
 } // namespace cpu
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 32e249f5a..f5d11f4f1 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,7 +17,6 @@
 #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 
-#include <backend/IExternalContext.h>
 #include <util/ConfigSource.h>
 #include <ruy/context.h>
 
@@ -33,7 +32,7 @@ namespace backend
 namespace cpu
 {
 
-class ExternalContext : public IExternalContext
+class ExternalContext
 {
 public:
   ExternalContext() : _ruy_context(new ruy::Context)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 451815b65..25756eced 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -23,6 +23,7 @@
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
 #include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
 #include "ops/EinsumLayer.h"
 #include "ops/ElementwiseActivationLayer.h"
@@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseActivation::Type::ELU:
+      return ops::ElementwiseActivationType::kElu;
     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
       return ops::ElementwiseActivationType::kLogistic;
     case ir::operation::ElementwiseActivation::Type::RELU:
       return ops::ElementwiseActivationType::kReLU;
     case ir::operation::ElementwiseActivation::Type::TANH:
       return ops::ElementwiseActivationType::kTanh;
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ops::ElementwiseActivationType::kLeakyReLU;
     default:
       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
   }
@@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+      return ops::ElementwiseBinaryType::kLogicalAnd;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
       return ops::ElementwiseBinaryType::kLogicalOr;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
       return ops::ElementwiseUnaryType::kRSqrt;
     case ir::operation::ElementwiseUnary::Type::SIN:
       return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+      return ops::ElementwiseUnaryType::kSqrt;
+    case ir::operation::ElementwiseUnary::Type::SQUARE:
+      return ops::ElementwiseUnaryType::kSquare;
     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
       return ops::ElementwiseUnaryType::kZerosLike;
     default:
@@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<ExternalContext> &external_context)
     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
   }
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     _return_fn = std::move(fn);
     return;
   }
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
-                dilation_height, activation, ofm_tensor);
+                dilation_height, activation, ofm_tensor, _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
@@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 void KernelGenerator::visit(const ir::operation::Fill &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+  // SHAPE input is used for shape inference
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 
   auto fn = std::make_unique<ops::FillLayer>();
 
-  fn->configure(input_tensor, value_tensor, output_tensor);
+  fn->configure(value_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   const auto &input_shape = _ctx.at(input_index).shape();
   UNUSED_RELEASE(input_shape);
-  assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+  assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 
   const auto axis_raw = node.param().axis;
   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
     for (auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
-      // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+      // TODO make sure using `_current_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
       auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
@@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  // AXIS input is used for output shape inference
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
-  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 
   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(-rank <= axis && axis < rank);
 
@@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   const auto input_index{node.getInputs().at(0)};
 
   const auto rank = _ctx.at(input_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
@@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+  const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
@@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
-  fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+  fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
 
   _return_fn = std::move(fn);
 }
@@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   const auto padding =
       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
@@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  auto block_size = node.param().block_size;
+
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+  fn->configure(input_tensor, block_size, output_tensor);
+  _return_fn = std::move(fn);
+}
+
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 {
   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 5df77607f..3a4cfbffa 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -23,7 +23,7 @@
 #include "Tensor.h"
 
 #include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 #include <ir/Operands.h>
 #include <ir/Operations.h>
 
@@ -34,7 +34,7 @@ namespace backend
 namespace cpu
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -43,59 +43,59 @@ public:
                   const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                   const std::shared_ptr<ExternalContext> &external_context);
 
-  using IKernelGenerator::visit;
+  void visit(const ir::OpSequence &) override;
 
   void visit(const ir::operation::AddN &) override;
-  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::ArgMinMax &) override;
+  void visit(const ir::operation::BatchMatMul &) override;
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::BroadcastTo &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::Custom &node) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::Fill &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Einsum &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Custom &node) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
+  void visit(const ir::operation::Fill &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::FusedBatchNorm &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LogSoftmax &) override;
   void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Unpack &) override;
+  void visit(const ir::operation::MatrixBandPart &) override;
   void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Select &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Shape &) override;
-  void visit(const ir::operation::ResizeBilinear &node) override;
-  void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::ArgMax &) override;
+  void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
   void visit(const ir::operation::Rank &) override;
-  void visit(const ir::operation::MatrixBandPart &) override;
-  void visit(const ir::operation::BatchMatMul &) override;
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::BroadcastTo &) override;
-  void visit(const ir::operation::FusedBatchNorm &) override;
-  void visit(const ir::operation::LogSoftmax &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
+  void visit(const ir::operation::Reverse &) override;
+  void visit(const ir::operation::Select &) override;
+  void visit(const ir::operation::Shape &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SplitV &) override;
+  void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::StridedSlice &) override;
+  void visit(const ir::operation::Tile &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::Unpack &) override;
 
 private:
   const ir::Operands &_ctx;
@@ -103,7 +103,7 @@ private:
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 3edac897c..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                                         cpu_common::DynamicTensorManager *dynamic_tensor_manager)
-    : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
-      _dynamic_tensor_manager{dynamic_tensor_manager}
-{
-  // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second.get();
-    if (!_as_constants[ind] && !tensor->is_dynamic())
-    {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer) << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
-                                      bool as_const)
-{
-  assert(!_tensors->getITensor(ind));
-  if (as_const)
-  {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  else
-  {
-    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
-                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (const auto &it : _tensors->native_tensors())
-    fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..d07f0c814 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
 
 namespace onert
 {
@@ -32,30 +26,7 @@ namespace backend
 namespace cpu
 {
 
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
-  StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                      cpu_common::DynamicTensorManager *dynamic_tensor_manager);
-  virtual ~StaticTensorManager() = default;
-
-  void allocateNonconsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout, bool as_const);
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
-  std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
-  ir::OperandIndexMap<bool> _as_constants;
-  cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 2ad2ad0fb..d663c3f50 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -28,92 +28,7 @@ namespace cpu
 {
 
 using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- *        instead of allocating and copying the data. ExternalTensor's data pointer points to
- *        an address of memory such as where memory is already allocated, or mmapped area.
- *        This is meaning that ExternalTensor can take all of types' ir::Data.
- *        To support this, assume below things no padding, always NHWC layout,
- *        constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
-  ExternalTensor() = delete;
-  virtual ~ExternalTensor();
-
-public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
-      : Tensor(info, layout, nullptr)
-  {
-    assert(_layout == ir::Layout::NHWC);
-    assert(_info.isConstant());
-    assert(_info.isDynamic() == false);
-  }
-
-public:
-  /**
-   * @brief     set Data to be shared from external so that this ExternalTensor will not be
-   *            allocated on CPU backend
-   * @param[in] data    data of Operand to be set
-   */
-  void setData(const std::shared_ptr<ir::Data> data)
-  {
-    assert(data != nullptr);
-    _data = data;
-    // Note. Some op such as cker::Conv could take buffer as nullptr.
-    // That's why _buffer also would be used
-    _buffer = const_cast<uint8_t *>(_data->base());
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-
-  bool is_constant() const override { return true; }
-  bool is_dynamic() const override { return false; }
-  void set_dynamic() override
-  {
-    throw std::runtime_error("This tensor does not support changing dynamic");
-  }
-
-  void setShape(const ir::Shape &) override
-  {
-    throw std::runtime_error("This tensor does not support changing shape");
-  }
-
-  void increase_ref() override { ++_num_references; }
-
-  void decrease_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    if (_num_references == 0)
-    {
-      _data.reset();
-      _buffer = nullptr;
-    }
-  }
-
-  /**
-   * @brief Reset reference count to zero and release data
-   */
-  void reset_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    _num_references = 0;
-
-    _data.reset();
-    _buffer = nullptr;
-  }
-
-  int32_t num_references() override { return _num_references; }
-
-private:
-  std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 448abc229..9d8a5deb5 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -20,7 +20,6 @@
 #include <backend/cpu_common/DynamicTensorManager.h>
 #include <backend/cpu_common/TensorRegistry.h>
 
-#include <backend/ITensorBuilder.h>
 #include <ir/OperandIndexMap.h>
 
 #include "StaticTensorManager.h"
@@ -35,7 +34,7 @@ namespace backend
 namespace cpu
 {
 
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
 {
 public:
   TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
    * @param[in] layout Operand data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
+                          ir::Layout backend_layout);
 
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override { /* DO NOTHING */}
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
 
-  IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
 
 private:
   const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
 
 #include "Backend.h"
 
-#include <util/logging.h>
-
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'cpu' loaded\n";
-  return new onert::backend::cpu::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index 2fd284c91..d5ffdef0b 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -79,6 +79,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
         break;
@@ -97,6 +100,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
         break;
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..ba9655924 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
 
 void BatchMatMulLayer::run()
 {
-  if (_lhs->data_type() == OperandType::FLOAT32)
+  if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
   {
     batchMatMulFloat32();
   }
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..edfdfc1a6 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
 
 void ConcatLayer::run()
 {
-  if (_output->data_type() == OperandType::FLOAT32)
+  switch (_output->data_type())
   {
-    concatenationGeneral<float>();
+    case OperandType::FLOAT32:
+      concatenationGeneral<float>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      concatenationQuant8();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      concatenationGeneral<int8_t>();
+      break;
+    case OperandType::INT32:
+      concatenationGeneral<int32_t>();
+      break;
+    case OperandType::INT64:
+      concatenationGeneral<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Concat: unsupported data type");
   }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    concatenationQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    concatenationGeneral<int32_t>();
-  }
-  else if (_output->data_type() == OperandType::INT64)
-  {
-    concatenationGeneral<int64_t>();
-  }
-  else
-    throw std::runtime_error("Concat: unsupported data type");
 }
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 799e9e2d0..c964e38f9 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -203,8 +203,6 @@ void ConvolutionLayer::prepare()
   _prepare = true;
 }
 
-#undef ANDROID_NN_CONV_PARAMETERS
-
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..d265d0ac2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+  nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+                           getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+                           _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+                                  IPortableTensor *output)
+{
+  _input = input;
+  _block_size = block_size;
+  _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      depthToSpace<float>();
+      break;
+    case OperandType::INT32:
+      depthToSpace<int32_t>();
+      break;
+    case OperandType::INT64:
+      depthToSpace<int64_t>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      depthToSpace<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      depthToSpace<int8_t>();
+      break;
+    default:
+      throw std::runtime_error{"DepthToSpace: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+  DepthToSpaceLayer();
+
+  void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  template <typename T> void depthToSpace();
+
+  const IPortableTensor *_input;
+  int32_t _block_size;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index f1dc1103a..85553d14d 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32()
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
+  nnfw::cker::DepthwiseConv<float, float>(
       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
       getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
       getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+      _external_context->ruy_context());
 }
 
 void DepthwiseConvolutionLayer::convQuant8()
@@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8()
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
+  nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
       op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
       getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
       getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+      _external_context->ruy_context());
 }
 
 void DepthwiseConvolutionLayer::configure(
@@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure(
     const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
     const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
     const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
-    const ir::Activation activation, IPortableTensor *output)
+    const ir::Activation activation, IPortableTensor *output,
+    const std::shared_ptr<ExternalContext> &external_context)
 {
   _input = input;
   _kernel = kernel;
@@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure(
   _dilationHeight = dilationHeight;
   _activation = activation;
   _output = output;
+  _external_context = external_context;
 }
 
 void DepthwiseConvolutionLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index fb032ecbf..fe1fcc182 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
+#include "../ExternalContext.h"
 
 #include <exec/IFunction.h>
 
@@ -47,7 +48,7 @@ public:
                  const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
                  const uint32_t multiplier, const uint32_t dilationWidth,
                  const uint32_t dilationHeight, const ir::Activation activation,
-                 IPortableTensor *output);
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
 
   void run() override;
 
@@ -71,6 +72,8 @@ private:
   uint32_t _dilationHeight{1};
 
   ir::Activation _activation{ir::Activation::NONE};
+
+  std::shared_ptr<ExternalContext> _external_context;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..3e1da5ec0 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
 #include <cker/operation/Logistic.h>
 #include <cker/operation/ReLU.h>
 #include <cker/operation/ReLU6.h>
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
 
   switch (op_type)
   {
+    case ElementwiseActivationType::kElu:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                          getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+      }
+      break;
     case ElementwiseActivationType::kLogistic:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
@@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
         throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
       }
       break;
+    case ElementwiseActivationType::kLeakyReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+                                reinterpret_cast<const float *>(input->buffer()),
+                                getTensorShape(output),
+                                reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+      }
+      break;
     default:
       throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
   }
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..948ab3b57 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
 
 enum class ElementwiseActivationType
 {
+  kElu,
   kLogistic,
   kReLU,
-  kTanh
+  kTanh,
+  kLeakyReLU
 };
 
 class ElementwiseActivationLayer : public ::onert::exec::IFunction
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..1e17a0828 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,7 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/LogicalAnd.h>
 #include <cker/operation/LogicalOr.h>
 #include <cker/operation/MaxMin.h>
 
@@ -33,6 +34,25 @@ namespace ops
 namespace
 {
 template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalAndBroadcast<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::LogicalAndElementwise<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+  }
+}
+
+template <typename T>
 void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
                       IPortableTensor *output)
 {
@@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
 
   switch (op_type)
   {
+    case ElementwiseBinaryType::kLogicalAnd:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalAndGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
     case ElementwiseBinaryType::kLogicalOr:
       if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
       {
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index 066455e72..15d7f3049 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
                   getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
 }
 
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                   getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                     getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
 template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
   if (!HaveSameShapes(input, output))
@@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
         throw std::runtime_error{"Sin: Unsupported  data type"};
       }
       break;
+    case ElementwiseUnaryType::kSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSquare:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = squareFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Square: Unsupported  data type"};
+      }
+      break;
     case ElementwiseUnaryType::kZerosLike:
       if (input->data_type() == OperandType::FLOAT32)
       {
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index c1765b5b7..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -46,6 +46,8 @@ enum class ElementwiseUnaryType
   kRound,
   kRSqrt,
   kSin,
+  kSqrt,
+  kSquare,
   kZerosLike
 };
 
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
 namespace ops
 {
 
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
-                                IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 {
   _input = input;
-  _axis = axis;
   _output = output;
 }
 
 void ExpandDimsLayer::run()
 {
-  // TODO use _axis to calculate shape of output when _axis is not constant
   size_t count = _input->total_size();
   memcpy(_output->buffer(), _input->buffer(), count);
 }
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
   ExpandDimsLayer();
 
 public:
-  void configure(const IPortableTensor *input, const IPortableTensor *axis,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
-  const IPortableTensor *_axis;
   IPortableTensor *_output;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index df3f8b7cd..5b7c17907 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
 namespace ops
 {
 
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
-                          IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
 {
-  _input = input;
   _value = value;
   _output = output;
 }
@@ -47,28 +45,24 @@ void FillLayer::run()
   switch (_output->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                reinterpret_cast<float *>(_value->buffer()),
+      nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
                                 getTensorShape(_output),
                                 reinterpret_cast<float *>(_output->buffer()));
       break;
     case OperandType::INT32:
-      nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                  reinterpret_cast<int32_t *>(_value->buffer()),
+      nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
                                   getTensorShape(_output),
                                   reinterpret_cast<int32_t *>(_output->buffer()));
       break;
     case OperandType::INT64:
-      nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                  reinterpret_cast<int64_t *>(_value->buffer()),
+      nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
                                   getTensorShape(_output),
                                   reinterpret_cast<int64_t *>(_output->buffer()));
       break;
     case OperandType::UINT32:
-      nnfw::cker::Fill<uint32_t *>(
-          getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-          reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
-          reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+                                   getTensorShape(_output),
+                                   reinterpret_cast<uint32_t *>(_output->buffer()));
       break;
     default:
       throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
 public:
   FillLayer();
 
-  void configure(const IPortableTensor *input, const IPortableTensor *value,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *value, IPortableTensor *output);
 
   void run() override;
 
 private:
-  const IPortableTensor *_input;
   const IPortableTensor *_value;
   IPortableTensor *_output;
 };
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..f130692ee 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
 
 void MeanLayer::MeanFloat32()
 {
-  nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-                   getReducerAxes(_axes));
+  const auto inputShape = getTensorShape(_input);
+  const auto axisVec = getReducerAxes(_axes);
+  bool axis_is_1_and_2 =
+      _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+      ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+  if (axis_is_1_and_2)
+  {
+    nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+                              getTensorShape(_output),
+                              reinterpret_cast<float *>(_output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+                     getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+                     axisVec);
+  }
 }
 
 void MeanLayer::MeanQuant8()
@@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
   _axes = axes;
   _output = output;
   _keep_dims = keep_dims;
+
+  if (_input->data_type() != OperandType::FLOAT32 &&
+      _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+    throw std::runtime_error{"Mean: unsupported data type"};
 }
 
 void MeanLayer::run()
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
new file mode 100644
index 000000000..bc8a024d8
--- /dev/null
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
+  {
+    const auto &operands = graph.operands();
+    const auto &operations = graph.operations();
+    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
new file mode 100644
index 000000000..ef686f480
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h
new file mode 100644
index 000000000..b965c9a9d
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+
+  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt
new file mode 100644
index 000000000..206acbfbf
--- /dev/null
+++ b/runtime/onert/backend/ruy/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc
index dac8f898b..179caa9a6 100644
--- a/runtime/onert/backend/cpu/Tensor.cc
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "Tensor.h"
+#include "Config.h"
 
 namespace onert
 {
 namespace backend
 {
-namespace cpu
+namespace ruy
 {
 
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h
new file mode 100644
index 000000000..9160dd5b1
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "ruy"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h
new file mode 100644
index 000000000..24b4d924d
--- /dev/null
+++ b/runtime/onert/backend/ruy/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
new file mode 100644
index 000000000..f51faccb8
--- /dev/null
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 4;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+public:
+  ExternalContext() : _ruy_context(new ::ruy::Context)
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
+  }
+
+  ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
new file mode 100644
index 000000000..cd2825068
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
+    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+  assert(!_return_fn_seq);
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->operations = &_operations_ctx;
+    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+  }
+
+  _current_layout = op_seq.getLayout();
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    const auto &node = _operations_ctx.at(operation_idx);
+    node.accept(*this);
+    _return_fn_seq->append(releaseFunction());
+
+    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    {
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+      if (portable_tensor)
+      {
+        assert(portable_tensor->layout() == ir::Layout::NHWC);
+      }
+
+      auto tensor = _tensor_reg->getNativeTensor(ind);
+      if (tensor)
+      {
+        tensor->increase_ref();
+      }
+    }
+  }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+  if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+  {
+    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+                  param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor, _external_context);
+
+    _return_fn = std::move(fn);
+    return;
+  }
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+  const auto weights_format = node.param().weights_format;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
new file mode 100644
index 000000000..0f6bd590a
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  ir::Layout _current_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h
new file mode 100644
index 000000000..af2d25241
--- /dev/null
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h
new file mode 100644
index 000000000..60d0fbf77
--- /dev/null
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc
new file mode 100644
index 000000000..c77defc30
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // CPU backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h
new file mode 100644
index 000000000..91c07bd82
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
+
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..d249b2ce3
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+      _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+  // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::ruy::ConvParams op_params;
+  op_params.padding_type = getPaddingType(_paddingType);
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+         getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+         getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+         _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, const ir::PaddingType paddingType,
+                                 const uint32_t paddingLeft, const uint32_t paddingRight,
+                                 const uint32_t paddingTop, const uint32_t paddingBottom,
+                                 const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
+                                 const ir::Activation activation, IPortableTensor *output,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _paddingType = paddingType;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+  prepare();
+
+  if (_input->is_dynamic() || _kernel->is_dynamic())
+  {
+    const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+    const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+    // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+    const auto ker_shape = _kernel->getShape();
+    const auto ker_height = ker_shape.dim(1);
+    const auto ker_width = ker_shape.dim(2);
+
+    ir::Stride stride;
+    stride.vertical = _strideWidth;
+    stride.horizontal = _strideWidth;
+
+    ir::Padding param_padding;
+    param_padding.type = _paddingType;
+    param_padding.param.left = _paddingLeft;
+    param_padding.param.right = _paddingRight;
+    param_padding.param.top = _paddingTop;
+    param_padding.param.bottom = _paddingBottom;
+
+    const auto padding =
+        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                             _dilationWidthFactor, _dilationHeightFactor);
+
+    _paddingLeft = padding.left;
+    _paddingRight = padding.right;
+    _paddingTop = padding.top;
+    _paddingBottom = padding.bottom;
+  }
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    convFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"Conv: unsupported data type"};
+  }
+}
+
+void ConvolutionLayer::prepare()
+{
+  if (_prepare)
+    return;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+  {
+    kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+                   _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+  }
+  _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..a55387b93
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+  ConvolutionLayer();
+  ~ConvolutionLayer();
+
+public:
+  void convFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType _paddingType,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _paddingType;
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
+
+  ir::Activation _activation;
+
+  std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+  bool _prepare;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..af693e3b4
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+    : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+      _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+  // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+  nnfw::ruy::FullyConnectedParams op_params;
+
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+  op_params.lhs_cacheable = _weights->is_constant();
+  op_params.rhs_cacheable = _input->is_constant();
+
+  nnfw::ruy::FullyConnected(
+      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+      _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    ir::FullyConnectedWeightsFormat weights_format,
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
+{
+  UNUSED_RELEASE(weights_format);
+  _input = input;
+  _weights = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    fullyConnectedFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"FullyConnected: unsupported data type"};
+  }
+}
+
+void FullyConnectedLayer::prepare()
+{
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..33d560f0b
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+  FullyConnectedLayer();
+  ~FullyConnectedLayer();
+
+public:
+  void fullyConnectedFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *weights,
+                 const IPortableTensor *bias, ir::Activation activation,
+                 ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+                 const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_weights;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc
new file mode 100644
index 000000000..929107b1a
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+  switch (ir_padding_type)
+  {
+    case ir::PaddingType::EXPLICIT:
+      return nnfw::ruy::PaddingType::kNone;
+    case ir::PaddingType::SAME:
+      return nnfw::ruy::PaddingType::kSame;
+    case ir::PaddingType::VALID:
+      return nnfw::ruy::PaddingType::kValid;
+    default:
+      throw std::runtime_error("Wrong padding type.");
+      break;
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
new file mode 100644
index 000000000..5dfdc7ec5
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+  if (tensor == nullptr)
+    return nnfw::ruy::Shape();
+
+  const ir::Shape &shape = tensor->get_info().shape();
+
+  assert(tensor->layout() == ir::Layout::NHWC);
+
+  auto rank = shape.rank();
+  nnfw::ruy::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
+  {
+    data[i] = shape.dim(i);
+  }
+  return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+      return nnfw::ruy::FusedActivationFunctionType::kNone;
+    case ir::Activation::RELU:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu;
+    case ir::Activation::RELU1:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+    case ir::Activation::RELU6:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+    case ir::Activation::TANH:
+      return nnfw::ruy::FusedActivationFunctionType::kTanh;
+    case ir::Activation::SIGMOID:
+      return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+    default:
+      throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+  }
+}
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    std::cout << "Unsupported fused activation function." << std::endl;
+  }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc
new file mode 100644
index 000000000..4f33590e9
--- /dev/null
+++ b/runtime/onert/backend/ruy/ruy.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
new file mode 100644
index 000000000..b7aef1625
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
+  {
+    const auto &operands = graph.operands();
+    const auto &operations = graph.operations();
+    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
new file mode 100644
index 000000000..503d088aa
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h
new file mode 100644
index 000000000..f81175b9e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(nullptr)
+  {
+    int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+    if (num_threads < 1)
+      num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+    _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+
+  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt
new file mode 100644
index 000000000..e3de31e6f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+  return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
diff --git a/runtime/onert/backend/xnnpack/Config.cc b/runtime/onert/backend/xnnpack/Config.cc
new file mode 100644
index 000000000..4d42a3f18
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
+{
+  xnn_status status = xnn_initialize(nullptr /* allocator */);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to initialize XNNPACK"};
+  }
+  return true;
+}
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h
new file mode 100644
index 000000000..2cf7406e5
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+  virtual ~Config();
+
+public:
+  std::string id() override { return "xnnpack"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h
new file mode 100644
index 000000000..45cdd8cd9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.cc b/runtime/onert/backend/xnnpack/ExternalContext.cc
new file mode 100644
index 000000000..3a9fe1b55
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExternalContext.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+ExternalContext::ExternalContext(size_t num_threads)
+    : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
+{
+  assert(_threadpool);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h
new file mode 100644
index 000000000..682fd2e4e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+  ExternalContext(size_t num_threads);
+
+public:
+  pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+  std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
new file mode 100644
index 000000000..b7d3f60fb
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
+    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+  assert(!_return_fn_seq);
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->operations = &_operations_ctx;
+    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+  }
+
+  _current_layout = op_seq.getLayout();
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    const auto &node = _operations_ctx.at(operation_idx);
+    node.accept(*this);
+    _return_fn_seq->append(releaseFunction());
+
+    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    {
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+      if (portable_tensor)
+      {
+        assert(portable_tensor->layout() == ir::Layout::NHWC);
+      }
+
+      auto tensor = _tensor_reg->getNativeTensor(ind);
+      if (tensor)
+      {
+        tensor->increase_ref();
+      }
+    }
+  }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  using ir::operation::DepthwiseConv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+  const auto dilation_width = node.param().dilation.width_factor;
+  const auto dilation_height = node.param().dilation.height_factor;
+  const auto param_padding = node.param().padding;
+  const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+                                            ker_height, dilation_width, dilation_height);
+  const auto multiplier = node.param().multiplier;
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
new file mode 100644
index 000000000..265824204
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  ir::Layout _current_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h
new file mode 100644
index 000000000..f7344e8d8
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h
new file mode 100644
index 000000000..b39cbd266
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc
new file mode 100644
index 000000000..b570144ce
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // XNNPACK backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h
new file mode 100644
index 000000000..dddfedbf9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on XNNPACK backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
+
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..0612995c2
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
+      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                                 const uint32_t padding_left, const uint32_t padding_right,
+                                 const uint32_t padding_top, const uint32_t padding_bottom,
+                                 const uint32_t stride_width, const uint32_t stride_height,
+                                 const uint32_t dilation_width_factor,
+                                 const uint32_t dilation_height_factor,
+                                 const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 Convolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+  }
+}
+
+bool ConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(3);
+  assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+      1 /* groups */, input_channels /* group_input_channels */,
+      output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
+      output_channels /* output_channel_stride */,
+      reinterpret_cast<const float *>(_kernel->buffer()),
+      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+      output_activation_max, 0, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+      _kernel_op, batch_size, input_height, input_width,
+      reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..6cbaa9f3a
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+  ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+                 const ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..947f04194
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+    const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+    const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+    ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+    const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+    const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+    const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _multiplier = multiplier;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+  }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(3);
+  uint32_t input_channels = _input->getShape().dim(3);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+  assert(output_channels == input_channels * _multiplier);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+      input_channels /* groups */, 1 /* group_input_channels */,
+      _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+      output_channels /* output_channel_stride */,
+      reinterpret_cast<const float *>(_kernel->buffer()),
+      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+      output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+      _kernel_op, batch_size, input_height, input_width,
+      reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..10f840ae7
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+  DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t multiplier, const uint32_t dilation_width_factor,
+                 const uint32_t dilation_height_factor, const ir::Activation activation,
+                 IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _multiplier;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..d595fda36
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    IPortableTensor *output)
+{
+  _input = input;
+  _kernel = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+  }
+}
+
+bool FullyConnectedLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  const auto &kernel_shape = _kernel->getShape();
+  assert(kernel_shape.rank() == 2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(1);
+
+  const auto &input_shape = _input->getShape();
+  const auto &output_shape = _output->getShape();
+  uint32_t flag = 0;
+  if (input_shape.rank() != output_shape.rank())
+  {
+    flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+    assert(input_shape.num_elements() % input_channels == 0);
+  }
+  else
+  {
+    assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+  }
+
+  assert(_kernel && _kernel->buffer());
+  const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+  const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+  enum xnn_status status = xnn_create_fully_connected_nc_f32(
+      input_channels, output_channels, input_channels /* input stride */,
+      output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+      output_activation_max, flag, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+  enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+      _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..883607ef9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+  FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
new file mode 100644
index 000000000..68b610f33
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+  Layer(const std::shared_ptr<ExternalContext> external_context)
+      : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+  {
+    // DO NOTHING
+  }
+
+  ~Layer()
+  {
+    if (_kernel_op)
+      xnn_delete_operator(_kernel_op);
+  }
+
+public:
+  void prepare() override
+  {
+    if (_create)
+      return;
+
+    _create = create();
+    assert(_create);
+
+    _setup = setup();
+  }
+  virtual bool create() = 0;
+  virtual bool setup() = 0;
+
+protected:
+  xnn_operator_t _kernel_op;
+  bool _create;
+  bool _setup;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
new file mode 100644
index 000000000..5102e32dd
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+// duplicated from cpu/ops/OperationUtils.h
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <ir/DataType.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    throw std::runtime_error{"Unsupported fused activation function"};
+  }
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc
new file mode 100644
index 000000000..38a6c5572
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/xnnpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+  return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+  delete backend;
+}
+}
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-12-14 14:43:43 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-12-14 14:43:43 +0900
commit	62529acabbafce7730601ed01d5709d7bc0d378a (patch)
tree	bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert/backend
parent	6ea13af5257155ff993c205cf997b870cc627f73 (diff)
download	nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2 nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip