Imported Upstream version 1.9.0upstream/1.9.0 submit/tizen/20200905.125700 accepted/tizen/unified/20200906.032650

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
commit: 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree: 3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert/backend
parent: 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download: nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip
92 files changed, 2926 insertions, 6106 deletions
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 8aaf516cd..5c5041378 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
 #include "KernelGenerator.h"
 #include "TensorManager.h"
 #include "Optimizer.h"
+#include "AclTensorRegistry.h"
 
 namespace onert
 {
@@ -47,10 +48,13 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
     context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index d7f5f8031..31f1c10eb 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
 
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
 {
   copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
 }
 
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::Gather &node)
 {
   copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
   copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
 }
 
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 {
   const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
   }
 }
 
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
 } // namespace acl_cl
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index c51f72b11..4f894fd31 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
 
 namespace onert
 {
@@ -28,32 +26,18 @@ namespace backend
 namespace acl_cl
 {
 
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::EmbeddingLookup &) final;
+  void visit(const ir::operation::Gather &) final;
+  void visit(const ir::operation::HashtableLookup &) final;
+  void visit(const ir::operation::SpaceToBatchND &) final;
 };
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index a84f983b4..94489253d 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
 namespace acl_cl
 {
 
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
 
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
-  auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  const auto activation = node.param().activation;
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
   {
-    auto l = std::make_unique<::arm_compute::CLCast>();
-
-    // TODO Support converting float to int32 as round down
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+          act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   {
-    auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
 
-    _return_fn = asAclClFunction(std::move(fn));
+    _return_fn = asAclFunction(std::move(fn));
   }
 }
 
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
   std::vector<::arm_compute::ICLTensor *> input_tensors;
   for (auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
   const auto activation = node.param().activation;
 
-  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                 ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Reduce &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto keep_dims{node.param().keep_dims};
   const auto reduce_type = node.param().reduce_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   std::unique_ptr<arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
-    auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
     const auto acl_axes =
         acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+                                                              keep_dims, output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLReduceOperation>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
     const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
-                 acl_common::convertReduceType(reduce_type));
 
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+        _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+        output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   UNUSED_RELEASE(frontend_layout);
   UNUSED_RELEASE(backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclClFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     ends_set.set(i, ends[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::CLSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
 
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
       rank, pv, frontend_layout, backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLPermute>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                                ofm_tensor->handle(), backend_pv);
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+          arm_compute::BinaryLogicalOperation::AND);
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+        ;
+      }
+      else
+      {
+        // TODO Support converting float to int32 as round down
+        fn = acl_common::generateLayer<arm_compute::CLCast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+      break;
+    }
+  }
+
+  auto acl_fn = asAclFunction(std::move(fn));
 
   _return_fn = std::move(acl_fn);
 }
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
-  auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                ::arm_compute::BinaryLogicalOperation::AND);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
-  _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
-                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
 }
 
 void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
   std::vector<arm_compute::ICLTensor *> inputs;
   for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
 
   if (axis < 0)
     axis += output_rank;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
   // Disable applied dim_correction
   std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
     orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
     assert(input_rank == input_tensor->num_dimensions());
     if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     }
   }
 
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
 
   // Revert disabling applied dim_correction
   assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
   }
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
     // WHCN -> CWHN
     pv = arm_compute::PermutationVector{2, 0, 1};
 
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
   {
     // CWHN -> WHCN
     pv = arm_compute::PermutationVector{1, 2, 0};
 
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle(), pv);
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLScale>();
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+  const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
 
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(),
+      ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
 
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
-  auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
-  _return_fn = asAclClFunction(std::move(copy_layer));
+  auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  _return_fn = asAclFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLFloor>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
-  l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-               ofm_tensor->handle());
-  fn = std::move(l);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
-
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
 
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
-  fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
-  auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+      invalid_vertical);
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+  auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
 
   const auto k = node.param().k;
 
-  auto values_tensor = _tensor_builder->at(outputValues_index).get();
-  auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
-  auto input_tensor = _tensor_builder->at(inputData_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+      input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
 
-  fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
   assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // Revert disabling applied dim_correction
   ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
   indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLNeg>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   assert((ifm_shape.rank() - 1) == ofm_shape.rank());
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
   auto frontend_layout = _current_op_seq_layout;
   auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   auto acl_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
 
-  fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-                ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   std::vector<arm_compute::ICLTensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLSplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
   std::vector<arm_compute::ICLTensor *> outputs;
   for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
     orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
     assert(output_rank == output_tensor->num_dimensions());
     if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
     }
   }
 
-  auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
   const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
   }
-  auto fn = std::make_unique<::arm_compute::CLPadLayer>();
 
   // Disable applied dim_correction
   size_t input_rank = _ctx.at(input_index).shape().rank();
-  const auto &input_tensor = _tensor_builder->at(input_index);
+  const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
   assert(input_rank == input_tensor->num_dimensions());
   if (input_rank != input_tensor->info()->num_dimensions())
   {
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
         _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
   }
 
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
 
   // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
   // It would produce a mistach of result
 
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index 1e3b06489..d188d6d83 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
 
 namespace onert
 {
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Mul &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Cast &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
   void visit(const ir::operation::LSTM &) override;
   void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
-  void visit(const ir::operation::RSQRT &) override;
-  void visit(const ir::operation::ReLU &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
+  void visit(const ir::operation::ResizeNearestNeighbor &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Floor &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::TransposeConv &) override;
-  void visit(const ir::operation::SQRT &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::LogicalNot &) override;
   void visit(const ir::operation::SquaredDifference &) override;
   void visit(const ir::operation::TopKV2 &) override;
   void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Neg &) override;
-  void visit(const ir::operation::Abs &) override;
   void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Dequantize &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::Unpack &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
   void visit(const ir::operation::ConvertFp32ToFp16 &) override;
   void visit(const ir::operation::ConvertFp16ToFp32 &) override;
 
@@ -104,6 +88,7 @@ private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
   ir::Layout _current_op_seq_layout;
 };
 
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 6ba3143e8..9134d3fb8 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
 #include "ParentInfo.h"
 
 #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
 #include <util/logging.h>
 #include "AclSubTensorAnalyzer.h"
 
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index bdbd0364e..ab295dbec 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
 using TensorManager =
     acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
 
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
 {
   if (is_linear_executor)
   {
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
new file mode 100644
index 000000000..6ad5b7b69
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+                                               const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+  // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+  const auto &block_size_obj = _operands.at(block_size_index);
+
+  if (block_size_obj.isConstant())
+  {
+    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+      assert(model_obj.data());
+      const auto &shape = model_obj.shape();
+      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+      assert(model_obj.shape().rank() == 1);
+      obj.access([&](ITensor &tensor) {
+        for (size_t i = 0; i < shape.num_elements(); ++i)
+        {
+          const int32_t value = base[shape.num_elements() - i - 1];
+          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
+          *into = value;
+        }
+      });
+    };
+  }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
new file mode 100644
index 000000000..52f4c54cf
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+  AclConstantInitializer(const ir::Operands &operands,
+                         const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+  void copyInputInitialize(const ir::Operation &node, uint32_t index);
+  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+  std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
index 85b18e847..94b65863a 100644
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
   std::unique_ptr<::arm_compute::IFunction> _func;
 };
 
-class AclClFunction : public AclFunction
-{
-public:
-  using AclFunction::AclFunction;
-};
-
 } // namespace acl_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 9f7ce3764..372ce689e 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
 namespace acl_common
 {
 
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+  auto l = std::make_unique<Layer>();
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+  auto l = std::make_unique<Layer>(memory_manager);
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
 template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
-              const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+          typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+                                               const ir::Operands &operands,
+                                               const std::shared_ptr<T_TensorRegistry> &tensor_reg)
 {
   // TODO Support dynamic rnn
   // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
   const auto projection_clip = projection_threshold;
   assert(cell_clip >= 0.f && projection_clip >= 0.f);
 
-  auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
-  auto output_tensor = tensor_builder->at(output_index).get();
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
 
-  auto input_tensor = tensor_builder->at(input_index).get();
+  auto input_tensor = tensor_reg->getAclTensor(input_index).get();
 
-  auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+  auto input_to_forget_weights_tensor =
+      tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+  auto input_to_output_weights_tensor =
+      tensor_reg->getAclTensor(input_to_output_weights_index).get();
   auto recurrent_to_forget_weights_tensor =
-      tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+  auto recurrent_to_cell_weights_tensor =
+      tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
   auto recurrent_to_output_weights_tensor =
-      tensor_builder->at(recurrent_to_output_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
 
-  auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
 
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<T_ACLLayer>();
+  auto act_info = asActivationLayerInfo(activation);
 
   ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
   if (has_cifg_param)
   {
     auto input_to_input_weights_tensor =
-        tensor_builder->at(input_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
     auto recurrent_to_input_weights_tensor =
-        tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
     auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
                            : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
     lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                 recurrent_to_input_weights_tensor->handle(),
                                 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
   if (has_peephole_param)
   {
     auto cell_to_forget_weights_tensor =
-        tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
     auto cell_to_output_weights_tensor =
-        tensor_builder->at(cell_to_output_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
     lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                     cell_to_output_weights_tensor->handle());
   }
   if (has_projection_param)
   {
-    auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
+    auto projection_weights_tensor =
+        tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+    auto projection_bias_handle =
+        has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+                            : nullptr; // optional
     lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
   }
 
-  fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-                input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-                recurrent_to_forget_weights_tensor->handle(),
-                recurrent_to_cell_weights_tensor->handle(),
-                recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-                cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-                output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-                scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-                cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
-                cell_clip, projection_clip);
+  auto fn = generateLayer<T_ACLLayer>(
+      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+      projection_clip);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
 
 template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
+          typename T_TensorBuilder, typename T_TensorRegistry>
 std::unique_ptr<exec::IFunction>
 kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
-                        const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                        const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
 {
   using ir::operation::FullyConnected;
 
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     reshape.dim(1) = input_size; /* W */
   }
 
-  auto output_tensor = tensor_builder->at(output_index).get();
-  const auto input_tensor = tensor_builder->at(input_index).get();
-  const auto weight_tensor = tensor_builder->at(weight_index).get();
-  const auto bias_tensor = tensor_builder->at(bias_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+  const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+  const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
   const auto frontend_layout = layout;
   const auto acl_layout = output_tensor->handle()->info()->data_layout();
 
-  auto fn =
-      std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
   typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
   if (operands.at(weight_index).isConstant())
   {
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     assert(operands.at(weight_index).data());
   }
 
-  fn->configure(
-      input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
-      output_tensor->handle(), needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
+  auto fn = generateLayer<T_ACLLayer>(
+      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
 
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
 std::unique_ptr<::arm_compute::IFunction>
 kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
-                const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+                const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
                 ::arm_compute::PoolingType pooling_type)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
   VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
 
-  auto ofm_tensor = tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
 
   ::arm_compute::PoolingLayerInfo info{
       pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<T_ACLLayer>();
+      asPadStrideInfo(padding, stride), true /* exclude_padding */};
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+  auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
 
   return fn;
 }
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 6b03fdf7f..91452014b 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
 #include "ir/OperandIndexMap.h"
 #include <ir/Operands.h>
 #include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
 #include <memory>
 #include "ParentInfo.h"
 #include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
 
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
   void notifyLastUse(const ir::OperandIndex &) override;
 
   bool isRegistered(const ir::OperandIndex &) const override;
-  std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
 
   void prepare(void) override;
   void allocate() override;
   void postFunctionPrepare() override;
 
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-  void iterate(const IterateFunction &fn) override;
-
   std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
 
-  std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
   void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
    */
   bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
 
-  bool supportDynamicTensor() override { return false; }
-
 private:
   void buildTensors(void);
   ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
 
   // for linear executor
   std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
 {
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
-                                                                     T_AclTensorManager *tensor_mgr)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
 {
   assert(_tensor_mgr);
 }
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
 }
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
-  _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
-  auto ret = _tensor_mgr->at(ind);
-  assert(ret != nullptr);
-  return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 std::unique_ptr<ITensorManager>
 AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
 {
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
new file mode 100644
index 000000000..1ef9f4b35
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+  AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  {
+    return _tensor_mgr->at(ind);
+  }
+
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    return getITensor(ind);
+  }
+
+  auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+  T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index a5bbe1691..67dcc8192 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
 
 #include "Swizzle.h"
 #include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
 #include <memory>
 
 namespace
@@ -177,6 +178,50 @@ namespace acl_common
   }
 }
 
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta)
+{
+  switch (op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      if (beta == 0.f)
+      {
+        if (alpha == ir::operation::ElementwiseActivation::infinity)
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+        }
+        else
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+        }
+      }
+      else
+      {
+        return ::arm_compute::ActivationLayerInfo{
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+      }
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+      // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+      // 0(always sigmoid) regardless of values of the parameter.
+      //      If ACL support non-sigmoid logistic, should fix param values.
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+    default:
+      throw std::runtime_error{"Not supported, yet"};
+      break;
+  }
+}
+
 arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                        ir::Layout frontend_layout, ir::Layout backend_layout)
 {
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
   return std::make_unique<AclFunction>(std::move(layer));
 }
 
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
-  return std::make_unique<AclClFunction>(std::move(layer));
-}
-
 ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
 {
   switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
   }
 }
 
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+  switch (pool_type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return arm_compute::PoolingType::AVG;
+    case ir::operation::Pool2D::PoolType::L2:
+      return arm_compute::PoolingType::L2;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return arm_compute::PoolingType::MAX;
+    default:
+      throw std::runtime_error("convertPoolType: Not supported operation yet");
+  }
+}
+
 arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 {
   switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 9362098a5..380321c07 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
 #include "ir/Layout.h"
 #include "ir/InternalType.h"
 #include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
 #include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
 #include "ir/Shape.h"
 #include "ir/TypeInfo.h"
 #include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
                                              const ir::Stride &stride);
 
 ::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta);
 
 arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                        ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
                          ir::Layout backend_layout);
 
 std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
 
 template <typename T_Function>
 std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
 ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
 ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
 
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
 arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
 
 } // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index a0b145e19..35d6e4e8e 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
     context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 4191b277f..79edb9ded 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
 
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 {
   const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
   }
 }
 
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
 } // namespace acl_neon
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index 6b4c1f145..c7d71cdcf 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
 
 namespace onert
 {
@@ -28,29 +26,15 @@ namespace backend
 namespace acl_neon
 {
 
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::SpaceToBatchND &node) final;
 };
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 1195b83cc..6d53c1245 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
     ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
 
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   auto frontend_layout = _current_op_seq_layout;
   auto backend_layout = ifm_tensor->layout();
 
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   const auto fixed_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
-  fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-                arm_compute::ReductionOperation::ARG_IDX_MAX);
+  auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+      arm_compute::ReductionOperation::ARG_IDX_MAX);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
-  auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::NECopy>();
+  const auto activation = node.param().activation;
 
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
   {
-    auto l = std::make_unique<::arm_compute::NECast>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+      fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
 
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   {
-    auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
 
     _return_fn = asAclFunction(std::move(fn));
   }
 }
 
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
   std::vector<::arm_compute::ITensor *> input_tensors;
   for (const auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
   }
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
-  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+  {
+    // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+    // 'vexpq_f32()'.
+    // The neon function returns a value outside of the limit of representation in float as 'NaN'
+    // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+        ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  }
+  else
+  {
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                                   ofm_tensor->handle(), act_info);
+  }
 
-  auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+  _return_fn = asAclFunction(std::move(fn));
+}
 
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  _return_fn = std::move(acl_fn);
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+      }
+      else
+      {
+        fn = acl_common::generateLayer<arm_compute::NECast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
-  auto fn = std::make_unique<::arm_compute::NEFloor>();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+                               "is not supported yet");
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
+}
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
   const auto activation = node.param().activation;
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                 ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
 
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+  auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
 
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   // Converting in reverse order
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
   const auto backend_layout = ofm_tensor->layout();
   UNUSED_RELEASE(backend_layout);
 
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
   assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
   // use arm_compute::TensorInfo::offset_element_in_bytes()
   // It would create an error when the kernel accesses high dimension that its value is 1
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
-  auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
       asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
-  // The neon function returns a value outside of the limit of representation in float as 'NaN'
-  // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
-  auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
   _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
-                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
-  // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
 }
 
 void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
   std::vector<arm_compute::ITensor *> inputs;
   for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
 
   if (axis < 0)
     axis += output_rank;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
   // Disable applied dim_correction
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
     assert(input_rank == input_tensor->num_dimensions());
     if (input_rank != input_tensor->info()->num_dimensions())
     {
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     }
   }
 
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
 
   // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
   // use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto rank = _ctx.at(input_index).shape().rank();
   auto pad_base = _ctx.at(pad_index).data()->base();
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
     const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+    const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
     const auto axis =
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   const auto pixel_value =
       ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
 
-  auto fn = std::make_unique<::arm_compute::NEPadLayer>();
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
 void KernelGenerator::visit(const ir::operation::Permute &node)
 {
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
     // WHCN -> CWHN
     pv = arm_compute::PermutationVector{2, 0, 1};
 
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
   {
     // CWHN -> WHCN
     pv = arm_compute::PermutationVector{1, 2, 0};
 
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
-  l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
-  fn = std::move(l);
+  auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
-    auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+                                                              keep_dims, output_tensor->handle());
   }
   else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
   {
-    auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+                                                             keep_dims, output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-                 acl_common::convertReduceType(reduce_type));
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+        input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+        acl_common::convertReduceType(reduce_type));
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   UNUSED_RELEASE(frontend_layout);
   UNUSED_RELEASE(backend_layout);
 
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEScale>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+  auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
 
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
-  auto copy_layer = std::make_unique<::arm_compute::NECopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::NERNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+  auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclFunction(std::move(fn));
 }
 
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = input_tensor->layout();
 
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
         acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
   }
 
-  auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
-  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-                ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   std::vector<arm_compute::ITensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NESplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     ends_set.set(i, ends[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::NESlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+  auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+  auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
 
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
-  auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+      ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+      invalid_horizontal, invalid_vertical);
 
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
   const auto &perm{node.param().perm};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
 
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
       rank, pv, frontend_layout, backend_layout);
 
   std::unique_ptr<::arm_compute::IFunction> fn;
-
   if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
   {
-    auto l = std::make_unique<::arm_compute::NETranspose>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), backend_pv);
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
   std::vector<arm_compute::ITensor *> outputs;
   for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
   // Disable applied dim_correction
   std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
     orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
     assert(output_rank == output_tensor->num_dimensions());
     if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
     }
   }
 
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->at(out_idx).get();
-  auto indices_tensor = _tensor_builder->at(indices_idx).get();
-  auto depth_tensor = _tensor_builder->at(depth_idx).get();
-  auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
-  auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
-  auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
-  fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-                offvalue_tensor->handle(), output_tensor->handle(), axis);
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+  auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+      indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+      offvalue_tensor->handle(), output_tensor->handle(), axis);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index d6f7932b7..4d269cde5 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
 
 namespace onert
 {
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::Abs &) override;
   void visit(const ir::operation::ArgMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Cast &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Dequantize &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::Floor &) override;
   void visit(const ir::operation::FullyConnected &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::InstanceNorm &) override;
   void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Neg &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Pad &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::RSQRT &) override;
   void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::SQRT &) override;
   void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Sub &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
   void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
   void visit(const ir::operation::OneHot &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
   ir::Layout _current_op_seq_layout;
 };
 
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 2948cab09..ac80901cc 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
 #include "ParentInfo.h"
 
 #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
 #include <util/logging.h>
 #include "AclSubTensorAnalyzer.h"
 
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3ec9efa8f..3b7cfbcfd 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
 using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
                                                    operand::NESubTensor>;
 
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
 {
   if (is_linear_executor)
   {
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 56bd352e0..fc8574b26 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>();
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
                                                             context->external_context());
     context->tensor_register = nullptr;
     context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index f314a8e39..e90b21054 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
 {
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                  std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
                  std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
                  std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
                  std::shared_ptr<ITensorRegister> tensor_register = nullptr,
                  std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
-                                       kernel_gen, tensor_register, optimizer),
+      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+                                       constant_initializer, kernel_gen, tensor_register,
+                                       optimizer),
         _external_context(new ExternalContext)
   {
   }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index deb27f0fe..6f6eb77bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index de03a693a..c016c83bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
 
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
 
 #include <backend/IConstantInitializer.h>
 #include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
   void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
   void visit(const ir::operation::FullyConnected &) override;
 
 private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
 
 private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
 };
 
 } // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 7939fe894..74b6f0c6b 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
 
 #include "KernelGenerator.h"
 
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
 #include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
 #include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
 #include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
 #include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
 #include "ops/ExpandDimsLayer.h"
 #include "ops/FillLayer.h"
 #include "ops/FullyConnectedLayer.h"
 #include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
 #include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
 #include "ops/OneHotLayer.h"
 #include "ops/OperationUtils.h"
 #include "ops/PackLayer.h"
 #include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
 #include "ops/PowLayer.h"
 #include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
 #include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
 #include "ops/ReshapeLayer.h"
 #include "ops/ResizeBilinearLayer.h"
 #include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
 #include "ops/SelectLayer.h"
 #include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
 #include "ops/SliceLayer.h"
 #include "ops/SoftMaxLayer.h"
 #include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
 #include "ops/SpaceToDepthLayer.h"
 #include "ops/SplitLayer.h"
 #include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
 #include "ops/TileLayer.h"
 #include "ops/TransposeLayer.h"
 #include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
 #include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
 #include "ops/L2NormLayer.h"
 #include "ops/MatrixBandPartLayer.h"
 #include "ops/BatchMatMulLayer.h"
 #include "ops/BroadcastToLayer.h"
 #include "ops/FusedBatchNormLayer.h"
 #include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
 #include "ops/StatelessRandomUniformLayer.h"
 
 #include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
 
 namespace
 {
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+  switch (arithmetic_type_ir)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      return ops::ArithmeticType::kAdd;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      return ops::ArithmeticType::kSub;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      return ops::ArithmeticType::kMul;
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+      return ops::ArithmeticType::kDiv;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      return ops::ElementwiseActivationType::kLogistic;
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      return ops::ElementwiseActivationType::kReLU;
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ops::ElementwiseActivationType::kTanh;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+      return ops::ElementwiseBinaryType::kLogicalOr;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+      return ops::ElementwiseBinaryType::kMax;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+      return ops::ElementwiseBinaryType::kMin;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+      return ops::ElementwiseUnaryType::kAbs;
+    case ir::operation::ElementwiseUnary::Type::CAST:
+      return ops::ElementwiseUnaryType::kCast;
+    case ir::operation::ElementwiseUnary::Type::COS:
+      return ops::ElementwiseUnaryType::kCos;
+    case ir::operation::ElementwiseUnary::Type::ERF:
+      return ops::ElementwiseUnaryType::kErf;
+    case ir::operation::ElementwiseUnary::Type::EXP:
+      return ops::ElementwiseUnaryType::kExp;
+    case ir::operation::ElementwiseUnary::Type::LOG:
+      return ops::ElementwiseUnaryType::kLog;
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+      return ops::ElementwiseUnaryType::kLogicalNot;
+    case ir::operation::ElementwiseUnary::Type::NEG:
+      return ops::ElementwiseUnaryType::kNeg;
+    case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+      return ops::ElementwiseUnaryType::kQuantize;
+    case ir::operation::ElementwiseUnary::Type::ROUND:
+      return ops::ElementwiseUnaryType::kRound;
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+      return ops::ElementwiseUnaryType::kRSqrt;
+    case ir::operation::ElementwiseUnary::Type::SIN:
+      return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+      return ops::ElementwiseUnaryType::kZerosLike;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return ops::PoolType::kAvg;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return ops::PoolType::kMax;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 {
   switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
 KernelGenerator::KernelGenerator(
     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
     const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
     const std::shared_ptr<ExternalContext> &external_context)
     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
-      _external_context(external_context)
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
 {
   assert(!_return_fn_seq);
   assert(_tensor_builder->dynamicTensorManager());
-  assert(_tensor_builder->tensorRegistry());
+  assert(_tensor_reg);
 
-  auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
-      _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
     dyn_ctx->op_seq = &op_seq;
     dyn_ctx->operations = &_operations_ctx;
     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+    dyn_ctx->tensor_registry = _tensor_reg;
     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
 
     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
 
     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
     {
-      auto portable_tensor = _tensor_builder->portableAt(ind);
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
       if (portable_tensor)
       {
         assert(portable_tensor->layout() == ir::Layout::NHWC);
       }
 
-      auto tensor = _tensor_builder->at(ind);
+      auto tensor = _tensor_reg->getNativeTensor(ind);
       if (tensor)
       {
         tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 
   const auto stride = node.param().stride;
   const auto activation = node.param().activation;
   const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
   auto fn = std::make_unique<ops::ConvolutionLayer>();
 
   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
   {
     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
-                  stride.horizontal, stride.vertical, activation, ofm_tensor);
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor);
 
     _return_fn = std::move(fn);
     return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_width = ker_shape.dim(2);
 
   const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
-                activation, ofm_tensor);
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 
   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::MaxPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AvgPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto rank = _ctx.at(ofm_index).shape().rank();
   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   auto fn = std::make_unique<ops::ConcatLayer>();
 
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
 
   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   if (node.getInputs().size() != NNApiInputs)
   {
     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
-    crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+    crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
   }
 
   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto value_tensor = _tensor_builder->portableAt(value_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
 
   auto fn = std::make_unique<ops::FillLayer>();
 
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
   auto bias_tensor =
-      bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+      bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
 
   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   // optional 2nd input
   IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   if (node.getInputs().size() == 2)
   {
     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_tensor = _tensor_builder->portableAt(shape_index).get();
+    shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
   }
 
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   // Squeeze can share same kernel with reshape
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::SoftMaxLayer>();
 
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::AddLayer>();
+  auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+                convertArithmeticType(node.param().arithmetic_type));
 
   _return_fn = std::move(fn);
 }
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   auto comparison_type = node.param().comparison_type;
 
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
 
   const auto backend_layout = output_tensor->layout();
   UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::SubLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MulLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::OneHot &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
 
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
-  auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
-  auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
-  auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+  auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+  auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+  auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
 
   assert(indices_tensor->data_type() == OperandType::INT32);
   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::DivLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Einsum &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   const auto equation = node.param().equation;
 
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
       const auto &operand = _ctx.at(idx);
       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
-      auto in_tensor = _tensor_builder->portableAt(idx);
+      auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
     }
   };
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
-  auto fn = std::make_unique<ops::ExpLayer>();
+  auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+                convertElementwiseActivationType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::ExpandDimsLayer>();
+  auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+                convertElementwiseBinaryType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
-  auto fn = std::make_unique<ops::LogisticLayer>();
+  auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 
-  auto fn = std::make_unique<ops::TanhLayer>();
+  auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 
   assert(-rank <= axis && axis < rank);
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   auto fn = std::make_unique<ops::PackLayer>();
 
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   std::vector<IPortableTensor *> output_tensors;
   for (auto &output_idx : node.getOutputs())
-    output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::UnpackLayer>();
 
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   const auto output_index{node.getOutputs().at(0)};
   assert(_ctx.at(pad_index).data());
 
-  auto input = _tensor_builder->portableAt(input_index).get();
-  auto output = _tensor_builder->portableAt(output_index).get();
+  auto input = _tensor_reg->getPortableTensor(input_index).get();
+  auto output = _tensor_reg->getPortableTensor(output_index).get();
   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MaxLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MinLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CastLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Transpose &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::TransposeLayer>();
 
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
   const auto keep_dims = node.param().keep_dims;
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
 
   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLULayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLU6Layer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Select &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
-  auto true_tensor = _tensor_builder->portableAt(true_index).get();
-  auto false_tensor = _tensor_builder->portableAt(false_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+  auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+  auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
 
   auto fn = std::make_unique<ops::SelectLayer>();
 
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
-  auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+  auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
 
   auto fn = std::make_unique<ops::SliceLayer>();
 
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
-  auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
-  auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+  auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+  auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
 
   auto begin_mask = node.param().begin_mask;
   auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
   auto axis_resolved = axis < 0 ? axis + rank : axis;
 
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
 
   std::vector<IPortableTensor *> out_tensors;
   for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::SplitLayer>();
 
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AbsLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::SinLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CosLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::RsqrtLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Shape &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
   auto fn = std::make_unique<ops::ShapeLayer>();
 
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
   auto align_corners = node.param().align_corners;
   auto half_pixel_centers = node.param().half_pixel_centers;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 
   auto fn = std::make_unique<ops::ReverseLayer>();
 
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::NegLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::PowLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::LogLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
 
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
-  auto fn = std::make_unique<ops::RoundLayer>();
+  auto fn = std::make_unique<ops::PoolLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+                convertPoolType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::LogicalNotLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(0)};
-  const auto rhs_index{node.getInputs().at(1)};
+  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::LogicalOrLayer>();
+  auto fn = std::make_unique<ops::PowLayer>();
 
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(0)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::L2NormLayer>();
 
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+  auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+  auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
 
-  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+  auto fn = std::make_unique<ops::RangeLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
 {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
-  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
-  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto start_tensor = _tensor_builder->portableAt(start_index).get();
-  auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
-  auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
-  auto fn = std::make_unique<ops::RangeLayer>();
+  auto fn = std::make_unique<ops::RankLayer>();
+
+  fn->configure(ifm_tensor, ofm_tensor);
 
-  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   auto fn = std::make_unique<ops::SqDiffLayer>();
 
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
 
   auto fn = std::make_unique<ops::TileLayer>();
 
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
-  auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+  auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
 
   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
 
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   const auto adj_x = node.param().adj_x;
   const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
 
   auto fn = std::make_unique<ops::BroadcastToLayer>();
 
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   const auto epsilon = node.param().epsilon;
   const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
   const auto beta = node.param().beta;
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
 
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
-  auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+  auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
 
   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
 
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
-  const auto output_index{node.getOutputs().at(0)};
-
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
-  auto fn = std::make_unique<ops::QuantizeLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 {
   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
   const auto output_index{node.getOutputs().at(0)};
   auto block_size = node.param().block_size;
 
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 
   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
 
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
-  auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+  auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
 
   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
 
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
 
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
-  auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
-  auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+  auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+  auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
 
   std::vector<IPortableTensor *> out_tensors;
   for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::SplitVLayer>();
 
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 40c056a96..786e68ee0 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
 
 #include "ExternalContext.h"
 #include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
 #include "Tensor.h"
 
 #include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
                   const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
                   const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                   const std::shared_ptr<ExternalContext> &external_context);
 
@@ -46,8 +48,6 @@ public:
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Fill &) override;
   void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
   void visit(const ir::operation::Squeeze &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Div &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Einsum &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::Custom &node) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Max &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Unpack &) override;
   void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Cast &) override;
   void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU6 &) override;
   void visit(const ir::operation::Select &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Abs &) override;
-  void visit(const ir::operation::Cos &) override;
-  void visit(const ir::operation::Sin &) override;
-  void visit(const ir::operation::RSQRT &) override;
   void visit(const ir::operation::Shape &) override;
   void visit(const ir::operation::ResizeBilinear &node) override;
   void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::Neg &) override;
   void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Log &) override;
-  void visit(const ir::operation::Round &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::ZerosLike &) override;
   void visit(const ir::operation::SquaredDifference &) override;
   void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::LogicalOr &) override;
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
+  void visit(const ir::operation::Rank &) override;
   void visit(const ir::operation::MatrixBandPart &) override;
   void visit(const ir::operation::BatchMatMul &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
   void visit(const ir::operation::FusedBatchNorm &) override;
   void visit(const ir::operation::LogSoftmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::Quantize &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::StatelessRandomUniform &) override;
   void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
   ir::Layout _current_op_seq_layout;
   const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index ab8ba5756..828d52f7c 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
 namespace cpu
 {
 
-TensorBuilder::TensorBuilder()
-    : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
       _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
       _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
 {
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
   assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
   const auto tensor_info = _tensor_info_map.at(ind);
 
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
   {
     const auto size = tensor_info.total_size();
     _static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
 
 void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
 {
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
   {
     _static_tensor_mgr->releasePlan(ind);
   }
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
   //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
 }
 
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
-                                     const std::shared_ptr<IPortableTensor> &tensor)
-{
-  return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getNativeTensor(ind);
-}
-
 std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
 {
   return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 617136514..b6d5f09cc 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
 class TensorBuilder : public ITensorBuilder
 {
 public:
-  TensorBuilder();
-
-  bool supportDynamicTensor() override { return true; }
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
   void allocate() override;
   void postFunctionPrepare() override { /* DO NOTHING */}
 
-  /**
-   * @brief Get tensor with a specific OperandIndex
-   *
-   * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
-   */
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
-  void iterate(const IterateFunction &fn) override;
-
   std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
 
   IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
 
   std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
 
-  /**
-   * @brief Get tensor with a specific OperandIndex.
-   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
-   *        If not, program will crash with assert or exception.
-   * @return shared_ptr<Tensor>
-   */
-  std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
-  bool setMigrantTensor(const ir::OperandIndex &ind,
-                        const std::shared_ptr<IPortableTensor> &tensor) override;
-
-  std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
 private:
   const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc
deleted file mode 100644
index 322785aeb..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
-  nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void AbsLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    absFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    absQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Abs: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h
deleted file mode 100644
index feb5f35ae..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
-  AbsLayer();
-
-public:
-  void absFloat32();
-
-  void absQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc
deleted file mode 100644
index 379215303..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  // cker quant8 add is not implemented yet
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void AddLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    addFloat32();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    addQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    addInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Add: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h
deleted file mode 100644
index 91030d93a..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
-  AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void addFloat32();
-
-  void addQuant8();
-
-  void addInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
deleted file mode 100644
index 9c22c1c86..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
-  AVGPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const float *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
-  AVGPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const uint8_t *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  assert(input != nullptr);
-  assert(output != nullptr);
-
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void AvgPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    averagePoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    averagePoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"AvgPool: unsupported data type"};
-  }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
deleted file mode 100644
index d4e8f79e7..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
-  AvgPoolLayer();
-
-public:
-  void averagePoolFloat32();
-
-  void averagePoolQuant8();
-
-  void configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                 const uint32_t paddingRight, const uint32_t paddingTop,
-                 const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const uint32_t kernelWidth,
-                 const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
new file mode 100644
index 000000000..f50c63375
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+          nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  const bool need_broadcast =
+      nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+  if (need_broadcast)
+  {
+    nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+        op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+    return;
+  }
+
+  nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+      op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+      getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+      reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+                      nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  switch (lhs->data_type())
+  {
+    case OperandType::FLOAT32:
+    {
+      float output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.float_activation_max = output_activation_max;
+      op_params.float_activation_min = output_activation_min;
+      return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    case OperandType::INT32:
+    {
+      int32_t output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.quantized_activation_max = output_activation_max;
+      op_params.quantized_activation_min = output_activation_min;
+      return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    default:
+      throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+  }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                             IPortableTensor *output, ir::Activation activation,
+                             nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  // Parameters for scaled quantized computation
+  op_params.left_shift = 20;
+  // Zero-points of input and output tensors
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+  // Compute normalized scale for _lhs and _rhs values,
+  // and represent in 32-bit fixed point
+  const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+  const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+  const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+  // output scale is used to normalize final result, so we invert the scale here
+  const double real_output_scale =
+      norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                        IPortableTensor *output, ir::Activation activation,
+                        nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+
+  double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                      IPortableTensor *output, const ir::Activation activation,
+                                      const ArithmeticType arithmetic_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  nnfw::cker::BinaryArithmeticOpParam op_params;
+  switch (arithmetic_type)
+  {
+    case ArithmeticType::kAdd:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kSub:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        op_params.input2_multiplier *= -1;
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kMul:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        nnfw::cker::BinaryArithmeticOpParam op_params;
+        setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kDiv:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        throw std::runtime_error{
+            "BinaryArithmetic(Div): Div operation does not support quantization"};
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    default:
+      throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+  }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
index 9411be76e..d6b33ad07 100644
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
 namespace ops
 {
 
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+  kAdd,
+  kSub,
+  kMul,
+  kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
 {
 public:
-  DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
   {
     // DO NOTHING
   }
 
 public:
-  void divFloat32();
-
-  void divQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ir::Activation activation, const ArithmeticType arithmetic_type);
 
   void run() override;
 
@@ -54,7 +58,7 @@ private:
   const IPortableTensor *_rhs;
   IPortableTensor *_output;
 
-  ir::Activation _activation{ir::Activation::NONE};
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -62,4 +66,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc
deleted file mode 100644
index 497515606..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
-  auto input_shape = getTensorShape(_input);
-  auto output_shape = getTensorShape(_output);
-  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
-  std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
-  switch (_output->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castTensor(in, out.f);
-      return;
-    case ir::DataType::INT32:
-      castTensor(in, out.i32);
-      return;
-    case ir::DataType::UINT32:
-      castTensor(in, out.u32);
-      return;
-    case ir::DataType::UINT8:
-      castTensor(in, out.u8);
-      return;
-    case ir::DataType::BOOL8:
-      castTensor(in, out.b);
-      return;
-    case ir::DataType::INT64:
-      castTensor(in, out.i64);
-      return;
-    default:
-      throw std::runtime_error("Not supported output type" +
-                               std::to_string((int)_output->data_type()));
-  }
-}
-
-void CastLayer::run()
-{
-  auto input_buf = _input->buffer();
-  auto output_buf = _output->buffer();
-  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
-  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
-  switch (_input->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castPtr(in.f, out);
-      return;
-    case ir::DataType::INT32:
-      castPtr(in.i32, out);
-      return;
-    case ir::DataType::UINT32:
-      castPtr(in.u32, out);
-      return;
-    case ir::DataType::UINT8:
-      castPtr(in.u8, out);
-      return;
-    case ir::DataType::BOOL8:
-      castPtr(in.b, out);
-      return;
-    case ir::DataType::INT64:
-      castPtr(in.i64, out);
-      return;
-    default:
-      throw std::runtime_error("Cast: unsupported data type" +
-                               std::to_string((int)_input->data_type()));
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h
deleted file mode 100644
index 290c722e2..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
-  CastLayer();
-
-public:
-  template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
-  template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2d5bbef1e..c057267d3 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
 ConvolutionLayer::ConvolutionLayer()
     : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
       _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
       _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
 {
   // DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
   op_params.padding_values.height = _paddingTop;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
 
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
   nnfw::cker::ConvParams op_params;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
   op_params.padding_type = getPaddingType(_paddingType);
   op_params.padding_values.width = _paddingLeft;
   op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
                                  const uint32_t paddingLeft, const uint32_t paddingRight,
                                  const uint32_t paddingTop, const uint32_t paddingBottom,
                                  const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
                                  const ir::Activation activation, IPortableTensor *output)
 {
   _input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
   _paddingBottom = paddingBottom;
   _strideWidth = strideWidth;
   _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
   _activation = activation;
   _output = output;
 }
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
     param_padding.param.bottom = _paddingBottom;
 
     const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                             _dilationWidthFactor, _dilationHeightFactor);
 
     _paddingLeft = padding.left;
     _paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
   {
     bool is_transposed = false;
     kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-                   getPaddingType(_paddingType), is_transposed);
+                   getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+                   _dilationHeightFactor);
 
     // Decrease reference of _kernel(weights) only when _kernel is constant
     if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 2833387c4..398892e65 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
                  const IPortableTensor *bias, ir::PaddingType _paddingType,
                  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const ir::Activation activation,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
                  IPortableTensor *output);
 
   void run() override;
@@ -77,6 +78,8 @@ private:
 
   uint32_t _strideWidth;
   uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
 
   ir::Activation _activation;
 
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc
deleted file mode 100644
index 9417019d5..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
-  nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void CosLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    cosFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    cosQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Cos: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h
deleted file mode 100644
index 1fadef718..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
-  CosLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void cosFloat32();
-  void cosQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc
deleted file mode 100644
index 556c55e33..000000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
-  if (requires_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-}
-
-void DivLayer::divQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  // op_params.quantized_activation_max = output_activation_max;
-  // op_params.quantized_activation_min = output_activation_min;
-
-  // cker quant8 div is not implemented yet
-  throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void DivLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    divFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    divQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Div: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..c1d63172b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+    : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+  const auto input_scale = static_cast<double>(_input->data_scale());
+  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+  const auto output_scale = static_cast<double>(_output->data_scale());
+  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    float transformed = 0.f;
+    if (op_type == ElementwiseActivationType::kTanh)
+    {
+      transformed = std::tanh(dequantized);
+    }
+    else if (op_type == ElementwiseActivationType::kLogistic)
+    {
+      transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    }
+    else
+    {
+      throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+    }
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+  }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+                                                      IPortableTensor *output)
+{
+  const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+  uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = _table[input_data[i]];
+  }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                           float alpha, float beta,
+                                           ElementwiseActivationType op_type)
+{
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseActivationType::kLogistic:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Logistic(getTensorShape(input),
+                               reinterpret_cast<const float *>(input->buffer()),
+                               getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU(getTensorShape(input),
+                             reinterpret_cast<const float *>(input->buffer()),
+                             getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else if (alpha == 6.f && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU6(getTensorShape(input),
+                              reinterpret_cast<const float *>(input->buffer()),
+                              reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else
+        {
+          throw std::runtime_error(
+              "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+        }
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kTanh:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                           getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+  }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 35a184074..3ef580041 100644
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,26 +30,33 @@ namespace cpu
 namespace ops
 {
 
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
 {
-public:
-  TanhLayer();
+  kLogistic,
+  kReLU,
+  kTanh
+};
 
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
 public:
-  void tanhFloat32();
+  ElementwiseActivationLayer();
 
-  void tanhQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+                 const ElementwiseActivationType op_type);
 
   void run() override;
 
-  void PopulateLookupTable();
+  void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+  void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
 
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
   uint8_t _table[256];
+  std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
 };
 
 } // namespace ops
@@ -57,4 +64,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
new file mode 100644
index 000000000..ea3c1e7cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                      IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalOrBroadcast<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::LogicalOrElementwise<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+  }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       const IPortableTensor *output)
+{
+  return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+         (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                       IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseBinaryType::kLogicalOr:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalOrGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMax:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Max NYI for quantized");
+        }
+        _kernel = maximumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = maximumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Max: unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMin:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Min NYI for quantized");
+        }
+        _kernel = minimumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        _kernel = minimumGeneric<int32_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = minimumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Min: unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index ed8dc5b0f..052747a4c 100644
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,20 +30,25 @@ namespace cpu
 namespace ops
 {
 
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+  kLogicalAnd,
+  kLogicalOr,
+  kMax,
+  kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
 {
 public:
-  MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
   {
     // DO NOTHING
   }
 
 public:
-  template <typename T> void maximum();
-
-  void maxQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ElementwiseBinaryType op_type);
 
   void run() override;
 
@@ -51,6 +56,7 @@ private:
   const IPortableTensor *_lhs;
   const IPortableTensor *_rhs;
   IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -58,4 +64,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
new file mode 100644
index 000000000..f8f89ab15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+  switch (data_type_out)
+  {
+    case ir::DataType::FLOAT32:
+      std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+      return;
+    case ir::DataType::INT32:
+      std::transform(in, in + num_elements, out.i32,
+                     [](FromT a) { return static_cast<int32_t>(a); });
+      return;
+    case ir::DataType::UINT32:
+      std::transform(in, in + num_elements, out.u32,
+                     [](FromT a) { return static_cast<uint32_t>(a); });
+      return;
+    case ir::DataType::UINT8:
+      std::transform(in, in + num_elements, out.u8,
+                     [](FromT a) { return static_cast<uint8_t>(a); });
+      return;
+    case ir::DataType::BOOL8:
+      std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+      return;
+    case ir::DataType::INT64:
+      std::transform(in, in + num_elements, out.i64,
+                     [](FromT a) { return static_cast<int64_t>(a); });
+      return;
+    default:
+      throw std::runtime_error("Cast: Not supported output type" +
+                               std::to_string((int)data_type_out));
+  }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+  auto input_buf = input->buffer();
+  auto output_buf = output->buffer();
+  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+  auto input_shape = getTensorShape(input);
+  auto output_shape = getTensorShape(output);
+  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+  switch (input->data_type())
+  {
+    case ir::DataType::FLOAT32:
+      castPtr(in.f, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT32:
+      castPtr(in.i32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT32:
+      castPtr(in.u32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT8:
+      castPtr(in.u8, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::BOOL8:
+      castPtr(in.b, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT64:
+      castPtr(in.i64, out, num_elements, output->data_type());
+      return;
+    default:
+      throw std::runtime_error("Cast: unsupported data type" +
+                               std::to_string((int)input->data_type()));
+  }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+                         getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+                       getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+                       output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  if (!HaveSameShapes(input, output))
+    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+  auto element_size = getTensorShape(input).FlatSize();
+
+  memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                      const ElementwiseUnaryType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseUnaryType::kAbs:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = absFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Abs: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kCast:
+      _kernel = cast;
+      break;
+    case ElementwiseUnaryType::kCos:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = cosFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Cos: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kExp:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = expFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kErf:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = erfFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLog:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = logFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Log: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLogicalNot:
+      if ((input->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalNot;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalNot: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kNeg:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = negFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Neg: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kQuantize:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = affineQuantize<float, uint8_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"Quantize: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRound:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = roundFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Round: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = rsqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"RSqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSin:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sinFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sin: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kZerosLike:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = zerosLikeFloat32<float>;
+      }
+      else if (input->data_type() == OperandType::INT32)
+      {
+        _kernel = zerosLikeFloat32<int32_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"ZerosLike: Unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 994d17a30..74968386d 100644
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,23 +30,41 @@ namespace cpu
 namespace ops
 {
 
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
 {
-public:
-  ReLU6Layer();
+  kAbs,
+  kCast,
+  kCos,
+  kErf,
+  kExp,
+  kLog,
+  kLogicalNot,
+  kNeg,
+  kQuantize,
+  kRound,
+  kRSqrt,
+  kSin,
+  kZerosLike
+};
 
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
 public:
-  void relu6Float32();
+  ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+  {
+    // DO NOTHING
+  }
 
-  void relu6Quant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 const ElementwiseUnaryType op_type);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -54,4 +72,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc
deleted file mode 100644
index 4dbec9cd5..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
-  nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
-  // cker quant8 exp is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ExpLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    expFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    expQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Exp: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/backend/cpu/ops/ExpLayer.h
deleted file mode 100644
index cd27b0e40..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ExpLayer : public ::onert::exec::IFunction
-{
-public:
-  ExpLayer();
-
-public:
-  void expFloat32();
-
-  void expQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc
deleted file mode 100644
index 307c15bc4..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
-  nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Log: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h
deleted file mode 100644
index 2f6b4b570..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
-  LogLayer();
-
-public:
-  void logFloat32();
-
-  void logQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 06dde4fc4..1d7ee6caa 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
   // DO NOTHING
 }
 
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+  const float scale = -_input->data_scale() * kBeta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    _table[max_uint8 - val] = expf(scale * val);
+  }
+}
+
 void LogSoftMaxLayer::logsoftmaxFloat32()
 {
   nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
 
 void LogSoftMaxLayer::logsoftmaxQuant8()
 {
-  // NYI
+  nnfw::cker::SoftmaxParams op_params;
+  op_params.beta = _beta;
+  op_params.axis = _axis;
+  op_params.table = _table;
+  op_params.zero_point = _output->data_offset();
+  op_params.scale = _output->data_scale();
+  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+                         reinterpret_cast<const uint8_t *>(_input->buffer()),
+                         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
 }
 
 void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
   _output = output;
   _beta = beta;
   _axis = axis;
+  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    PopulateLookupTable(_beta);
+  }
 }
 
 void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    throw std::runtime_error{"LogSoftmax : NYI"};
+    logsoftmaxQuant8();
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index ba9deca17..1533f3361 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
 
   void run();
 
+  void PopulateLookupTable(const float kBeta);
+
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
 
   float _beta;
   int _axis;
+  float _table[256];
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
deleted file mode 100644
index f2192c148..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
-  nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
-                         getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogicalNotLayer::run()
-{
-  if (_input->data_type() == OperandType::BOOL8)
-  {
-    logicalNotBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalNot: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
deleted file mode 100644
index 5543cca3d..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalNotLayer();
-
-public:
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void logicalNotBool8();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
deleted file mode 100644
index 5b7c9f6f0..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
-  if (!HaveSameShapes(_lhs, _rhs))
-  {
-    nnfw::cker::LogicalOrBroadcast<bool>(
-        getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
-        reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
-        reinterpret_cast<bool *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
-                                           reinterpret_cast<const bool *>(_lhs->buffer()),
-                                           reinterpret_cast<const bool *>(_rhs->buffer()),
-                                           reinterpret_cast<bool *>(_output->buffer()));
-  }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                               IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void LogicalOrLayer::run()
-{
-  if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
-  {
-    lorBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalOr: Unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
deleted file mode 100644
index efaf396e8..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // Nothing
-  }
-
-public:
-  void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void lorBool8();
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc
deleted file mode 100644
index 140ab4d2c..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void LogisticLayer::logisticFloat32()
-{
-  nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    if (_output->data_scale() != 1.f / 256)
-    {
-      throw std::runtime_error{"incorrect scale for output"};
-    }
-    populateLookupTable();
-  }
-}
-
-void LogisticLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logisticFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logisticQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Logistic: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h
deleted file mode 100644
index cac77939d..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
-  LogisticLayer();
-
-public:
-  void logisticFloat32();
-
-  void logisticQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-  void populateLookupTable();
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc
deleted file mode 100644
index 9631983be..000000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
-  nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Max<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MaxLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    maximum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Max: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
deleted file mode 100644
index 1e983b408..000000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
-  MAXPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
-  MAXPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void MaxPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    maxPoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxPoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"MaxPool: unsupported data type"};
-  }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc
deleted file mode 100644
index 20859673b..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
-  nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Min<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MinLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    minimum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    minQuant8();
-  }
-  else if (_lhs->data_type() == OperandType::INT32)
-  {
-    minimum<int32_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Min: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h
deleted file mode 100644
index 9bd114e54..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
-  MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  template <typename T> void minimum();
-
-  void minQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc
deleted file mode 100644
index eef73edf3..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-
-  double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
-  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void MulLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    mulFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    mulQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Mul: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h
deleted file mode 100644
index 2c4a98875..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
-  MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void mulFloat32();
-
-  void mulQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc
deleted file mode 100644
index 2cb95b771..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
-  nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void NegLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    negFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    negQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Neg: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h
deleted file mode 100644
index addf84ec2..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
-  NegLayer();
-
-public:
-  void negFloat32();
-
-  void negQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
new file mode 100644
index 000000000..85d02a751
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+                             reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                             reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+                         reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                         reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+  if (op_type == PoolType::kAvg)
+  {
+    return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else if (op_type == PoolType::kMax)
+  {
+    return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported pool type"};
+  }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+#define POOLING_PARAMETERS                              \
+  nnfw::cker::PoolParams op_params;                     \
+  op_params.stride_height = strideHeight;               \
+  op_params.stride_width = strideWidth;                 \
+  op_params.filter_height = kernelHeight;               \
+  op_params.filter_width = kernelWidth;                 \
+  op_params.padding_values.height = (int8_t)paddingTop; \
+  op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+                          const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+                          const uint32_t strideHeight, const uint32_t kernelWidth,
+                          const uint32_t kernelHeight, const ir::Activation activation,
+                          IPortableTensor *output, const PoolType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  POOLING_PARAMETERS
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    float output_activation_min = 0;
+    float output_activation_max = 0;
+    CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+    op_params.float_activation_min = output_activation_min;
+    op_params.float_activation_max = output_activation_max;
+
+    _kernel = generateKernelGeneric<float>(op_params, op_type);
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    int32_t output_activation_min = 0;
+    int32_t output_activation_max = 0;
+    CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+                                  &output_activation_max);
+    op_params.quantized_activation_min = output_activation_min;
+    op_params.quantized_activation_max = output_activation_max;
+    _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported data type"};
+  }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h
index 4c5109f64..b37835946 100644
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
 namespace ops
 {
 
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
 {
-public:
-  MaxPoolLayer();
+  kAvg,
+  kL2,
+  kMax,
+};
 
+class PoolLayer : public ::onert::exec::IFunction
+{
 public:
-  void maxPoolFloat32();
-
-  void maxPoolQuant8();
+  PoolLayer();
 
+public:
   void configure(const IPortableTensor *input, const uint32_t paddingLeft,
                  const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideWidth,
                  const uint32_t strideHeight, const uint32_t kernelWidth,
                  const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
+                 IPortableTensor *output, const PoolType op_type);
 
   void run() override;
 
@@ -54,17 +57,7 @@ private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
 
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -72,4 +65,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
deleted file mode 100644
index 45fc148bf..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
-  nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
-                       _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void QuantizeLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    affineQuantize<float, uint8_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Quantize: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
deleted file mode 100644
index b4e7aca40..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
-  QuantizeLayer();
-
-public:
-  template <typename InputT, typename OutputT> void affineQuantize();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 185d7554e..4690bdf72 100644
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
  * limitations under the License.
  */
 
-#include "RoundLayer.h"
+#include "RankLayer.h"
 
 #include "OperationUtils.h"
 
-#include <cker/operation/Round.h>
-
 namespace onert
 {
 namespace backend
@@ -28,32 +26,28 @@ namespace cpu
 {
 namespace ops
 {
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
 
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
 {
-  nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  // DO NOTHING
 }
 
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 {
   _input = input;
   _output = output;
 }
 
-void RoundLayer::run()
+void RankLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32)
+  if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
   {
-    roundFloat32();
+    int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+    output_data[0] = _input->num_dimensions();
   }
   else
   {
-    throw std::runtime_error{"Round: unsupported data type"};
+    throw std::runtime_error{"Rank : unsupported data type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h
index 054894203..6282ceb07 100644
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -29,11 +29,13 @@ namespace cpu
 {
 namespace ops
 {
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
 {
 public:
-  ZerosLikeLayer();
+  RankLayer();
 
+public:
   void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
@@ -48,4 +50,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
deleted file mode 100644
index 26eb35e0d..000000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
-  nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLU6Layer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    relu6Float32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    relu6Quant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU6: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc
deleted file mode 100644
index cb4529feb..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
-  nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLULayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    reluFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    reluQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h
deleted file mode 100644
index 4ba2be772..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ReLULayer : public ::onert::exec::IFunction
-{
-public:
-  ReLULayer();
-
-public:
-  void reluFloat32();
-
-  void reluQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index fe22dbed7..bb5f85d60 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
 }
 
 template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
-              bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (reduce_type)
   {
     case ReduceType::kSum:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
-                          [](const T current, const T in) -> T { return in + current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+                       [](const T current, const T in) -> T { return in + current; });
       break;
     case ReduceType::kProd:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
-                          [](const T current, const T in) -> T { return in * current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+                       [](const T current, const T in) -> T { return in * current; });
       break;
     case ReduceType::kMax:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
           [](const T current, const T in) -> T { return (in > current) ? in : current; });
       break;
     case ReduceType::kMin:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
           [](const T current, const T in) -> T { return (in < current) ? in : current; });
       break;
     default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
 
 // Template specialization for bool type
 template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
-                    const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
-                    ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (reduce_type)
   {
     case ReduceType::kAny:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, false, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in || current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, false, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in || current; });
       break;
     case ReduceType::kAll:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, true, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in && current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, true, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in && current; });
       break;
     default:
       throw std::runtime_error{"Reduce: Unsupported reduce type"};
   }
 }
 
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
-                 const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+                      nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (input->data_type())
   {
     case OperandType::FLOAT32:
-      return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<float>(keep_dims, reduce_kernel, reduce_type);
     case OperandType::INT32:
-      return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
     case OperandType::BOOL8:
-      return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
     default:
       throw std::runtime_error{"Reduce(generic): unsupported data type"};
   }
 }
 
+// TODO Refine this function
 void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
                       const std::vector<int> &axes, bool keep_dims,
                       nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
     return;
   }
 
-  evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+  const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+  kernel(input, output, axes);
 }
 
 } // namespace
 
 ReduceLayer::ReduceLayer()
-    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
-      _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+      _kernel()
 {
   // DO NOTHING
 }
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
   _input = input;
   _axes = axes;
   _output = output;
-  _reduceType = reduceType;
-  _keep_dims = keep_dims;
-}
 
-void ReduceLayer::run()
-{
-  const auto axes = getReducerAxes(_axes);
-  switch (_reduceType)
+  switch (reduceType)
   {
     case ReduceType::kSum:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
-        evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+        _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+                            std::placeholders::_3, keep_dims, *_reduce_kernel);
         return;
       }
-      evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
       break;
     case ReduceType::kProd:
-      evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
       break;
     case ReduceType::kMax:
-      evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
       break;
     case ReduceType::kMin:
-      evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
       break;
     case ReduceType::kAny:
-      evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
       break;
     case ReduceType::kAll:
-      evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
       break;
     default:
       throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
   }
 }
 
+void ReduceLayer::run()
+{
+  const auto axes = getReducerAxes(_axes);
+  _kernel(_input, _output, axes);
+}
+
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 8e7bcdb07..332d399bd 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
   const IPortableTensor *_input;
   const IPortableTensor *_axes;
   IPortableTensor *_output;
-  ReduceType _reduceType;
-  bool _keep_dims;
 
   std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+  std::function<void(const IPortableTensor *input, IPortableTensor *output,
+                     const std::vector<int> &axes)>
+      _kernel;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h
deleted file mode 100644
index fc6a46c0d..000000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
-  RoundLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void roundFloat32();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
deleted file mode 100644
index 0bd468f96..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
-  nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void RsqrtLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    rsqrtFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    rsqrtQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Rsqrt: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h
deleted file mode 100644
index 49abbb08d..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
-  RsqrtLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void rsqrtFloat32();
-  void rsqrtQuant8();
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc
deleted file mode 100644
index 2a6b11753..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
-  nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void SinLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    sinFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    sinQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Sin: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h
deleted file mode 100644
index 348350f41..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
-  SinLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void sinFloat32();
-  void sinQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 6e2bb584a..095e67abc 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
   // DO NOTHING
 }
 
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
-             float *out)
+void SoftMaxLayer::softmaxFloat32()
 {
-  assert(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++)
+  if (getNumberOfDimensions(_input) == 1)
   {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++)
-    {
-      if (in[i] > max_coeff)
-        max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
+    uint32_t input_size = getNumberOfElements(_input);
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+                        reinterpret_cast<float *>(_output->buffer()));
   }
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
-  if (getNumberOfDimensions(_input) == 2)
+  else if (getNumberOfDimensions(_input) == 2)
   {
     uint32_t batch_size = getSizeOfDimension(_input, 0);
     if (batch_size == 0)
       throw std::runtime_error("batch_size should not be 0");
 
     uint32_t input_size = getNumberOfElements(_input) / batch_size;
-    Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
-            reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+                        _beta, reinterpret_cast<float *>(_output->buffer()));
   }
   else if (getNumberOfDimensions(_input) == 4)
   {
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
   }
   else
   {
-    throw std::runtime_error{"only 2D and 4D tensors supported"};
+    throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc
deleted file mode 100644
index 597d52952..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  op_params.input2_multiplier *= -1;
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void SubLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    subFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    subQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    subInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Sub: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h
deleted file mode 100644
index 86f32ca6d..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
-  SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void subFloat32();
-
-  void subQuant8();
-
-  void subInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc
deleted file mode 100644
index 910ac1f41..000000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = std::tanh(dequantized);
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void TanhLayer::tanhFloat32()
-{
-  nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    PopulateLookupTable();
-  }
-}
-
-void TanhLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    tanhFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    tanhQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Tanh: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
deleted file mode 100644
index ae8084518..000000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
-  if (!HaveSameShapes(_input, _output))
-    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
-  auto element_size = getTensorShape(_input).FlatSize();
-
-  switch (_input->data_type())
-  {
-    case OperandType::FLOAT32:
-      memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
-      break;
-    case OperandType::INT32:
-      memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
-      break;
-    default:
-      throw std::runtime_error{"ZerosLike: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
commit	74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree	3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert/backend
parent	042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download	nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip