43 files changed, 2986 insertions, 1622 deletions
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index 3ca405899..a84f983b4 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -31,6 +31,7 @@
 #include "exec/FunctionSequence.h"
 #include "util/logging.h"
 #include "util/Utils.h"
+#include "AclKernelGen.h"
 
 namespace onert
 {
@@ -76,15 +77,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
   auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
 
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -96,15 +97,27 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
-                                  ? arm_compute::SubDataType::BOOL
-                                  : arm_compute::SubDataType::NONE;
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLCast>();
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (ifm_tensor->data_type() == ofm_tensor->data_type())
+  {
+    auto l = std::make_unique<::arm_compute::CLCopy>();
+
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+    fn = std::move(l);
+  }
+  else
+  {
+    auto l = std::make_unique<::arm_compute::CLCast>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+    // TODO Support converting float to int32 as round down
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+    fn = std::move(l);
+  }
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -132,10 +145,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -143,8 +156,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+                ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclClFunction(std::move(fn));
 }
@@ -171,10 +185,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -182,8 +196,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   {
     auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
 
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                  ofm_tensor->handle(), conv_info, multiplier, act_info);
 
     _return_fn = asAclClFunction(std::move(fn));
   }
@@ -191,88 +205,28 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 
 void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
 
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
-                                       ::arm_compute::Size2D{kw, kh},
-                                       acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
 
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -296,7 +250,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_alloc = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_builder->at(ofm_index).get();
   std::vector<::arm_compute::ICLTensor *> input_tensors;
   for (auto &ifm_ind : input_indexes)
     input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -305,7 +259,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   if (input_indexes.size() < 2)
   {
     auto l = std::make_unique<::arm_compute::CLCopy>();
-    l->configure(input_tensors.at(0), output_alloc->handle());
+    l->configure(input_tensors.at(0), output_tensor->handle());
     fn = std::move(l);
   }
   else
@@ -313,10 +267,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = output_alloc->layout();
+    const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
     fn = std::move(l);
   }
 
@@ -327,75 +281,15 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
-  using ir::operation::FullyConnected;
-
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
-  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
-  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-
-  const auto output_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
-  UNUSED_RELEASE(output_size);
-  assert(_ctx.at(bias_index).shape().dim(0) == output_size);
-  assert(_ctx.at(weight_index).shape().dim(0) == output_size);
-  const auto batch_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
-  const auto input_size =
-      _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
-  // Check for reshaping input's shape into rank-2
-  bool needs_reshape = false;
-  ir::Shape reshape(2);
-  if (input_rank == 3 || input_rank == 4)
-  {
-    const auto &ifm_shape = _ctx.at(input_index).shape();
-    auto feature_size = 1;
-    for (int i = 0; i < ifm_shape.rank(); ++i)
-    {
-      feature_size *= ifm_shape.dim(i);
-    }
-
-    UNUSED_RELEASE(feature_size);
-    assert(feature_size == batch_size * input_size);
-
-    // for reshaping
-    needs_reshape = true;
-    reshape.dim(0) = batch_size; /* H */
-    reshape.dim(1) = input_size; /* W */
-  }
-
+  auto output_tensor = _tensor_builder->at(output_index).get();
   const auto activation = node.param().activation;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  const auto input_alloc = _tensor_builder->at(input_index).get();
-  const auto weight_alloc = _tensor_builder->at(weight_index).get();
-  const auto bias_alloc = _tensor_builder->at(bias_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
-  auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  arm_compute::CLFullyConnectedReshapingLayer::KernelType kernel_type =
-      arm_compute::CLFullyConnectedReshapingLayer::KernelType::GENERAL;
-  if (_ctx.at(weight_index).isConstant())
-  {
-    kernel_type = arm_compute::CLFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
-    assert(_ctx.at(weight_index).data());
-  }
-  fn->configure(
-      input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
-      needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
-
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+                                                ::arm_compute::CLFullyConnectedReshapingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, output_alloc->handle()));
+      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -406,17 +300,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
                 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -427,14 +322,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto keep_dims{node.param().keep_dims};
   const auto reduce_type = node.param().reduce_type;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = input_alloc->layout();
+  const auto backend_layout = input_tensor->layout();
 
   std::unique_ptr<arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
@@ -443,7 +338,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
 
     const auto acl_axes =
         acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_alloc->handle(), acl_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
 
     fn = std::move(l);
   }
@@ -453,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
         _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
     const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
+    l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
                  acl_common::convertReduceType(reduce_type));
 
     fn = std::move(l);
@@ -469,13 +364,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
   UNUSED_RELEASE(frontend_layout);
@@ -483,7 +378,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -503,10 +398,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
   auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
   auto acl_fn = asAclClFunction(std::move(fn));
   _return_fn = std::move(acl_fn);
 }
@@ -516,15 +411,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<arm_compute::CLActivationLayer>();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -538,13 +433,13 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -558,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
   int input_rank = _ctx.at(input_index).shape().rank();
@@ -613,7 +508,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto fn = std::make_unique<::arm_compute::CLSlice>();
 
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -628,10 +523,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
   int input_rank = _ctx.at(input_index).shape().rank();
@@ -704,7 +599,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
 
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
                 strides_set, begin_mask, end_mask, shrink_axis_mask);
 
   auto acl_fn = asAclClFunction(std::move(fn));
@@ -720,10 +615,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
 
   std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
   // Reversed
@@ -732,7 +627,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   auto fn = std::make_unique<::arm_compute::CLPermute>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -747,17 +642,18 @@ void KernelGenerator::visit(const ir::operation::Add &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                 arm_compute::ConvertPolicy::SATURATE);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Sub &node)
@@ -768,17 +664,18 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                 arm_compute::ConvertPolicy::SATURATE);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Div &node)
@@ -789,16 +686,17 @@ void KernelGenerator::visit(const ir::operation::Div &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -806,12 +704,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLExpLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -823,12 +721,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -842,20 +740,21 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto gamma_alloc = _tensor_builder->at(gamma_index).get();
-  auto beta_alloc = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+  auto beta_tensor = _tensor_builder->at(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
   auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
-                beta_alloc->handle(), epsilon);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+                beta_tensor->handle(), epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Logistic &node)
@@ -863,15 +762,15 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
 
   auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -884,13 +783,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
   const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
   const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                 ::arm_compute::BinaryLogicalOperation::AND);
 
   auto acl_fn = asAclClFunction(std::move(fn));
@@ -900,159 +799,8 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
-  // TODO Support dynamic rnn
-  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
-  const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
-  const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
-  const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
-  const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
-  const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
-  const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
-  const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
-  const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
-  const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
-  const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
-  const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
-  const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
-  const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
-  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
-  const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
-  const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
-  const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
-  const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
-  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-  const auto cell_threshold = node.param().cell_threshold;
-  const auto projection_threshold = node.param().projection_threshold;
-
-  bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                    _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
-  bool has_recurrent_to_input_weights =
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
-  bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                _ctx.at(projection_weights_index).shape().dim(1) != 0;
-  bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
-  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
-  // true: no CIFG
-  // false: CIFG
-  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
-  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
-  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
-  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
-  // true: peephole
-  // false: no peephole
-  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
-  // NOTE Although the projection weights has data the projection bias may not have data.
-  bool has_projection_param = has_projection_weights;
-
-  const auto activation = node.param().activation;
-  const auto cell_clip = cell_threshold;
-  const auto projection_clip = projection_threshold;
-  assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
-  auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
-  auto output_alloc = _tensor_builder->at(output_index).get();
-
-  auto input_alloc = _tensor_builder->at(input_index).get();
-
-  auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
-  auto recurrent_to_forget_weights_alloc =
-      _tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
-  auto recurrent_to_output_weights_alloc =
-      _tensor_builder->at(recurrent_to_output_weights_index).get();
-
-  auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<::arm_compute::CLLSTMLayer>();
-
-  ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
-  if (has_cifg_param)
-  {
-    auto input_to_input_weights_alloc =
-        _tensor_builder->at(input_to_input_weights_index).get(); // optional
-    auto recurrent_to_input_weights_alloc =
-        _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
-    auto cell_to_input_weights_handle =
-        has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
-    lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
-                                recurrent_to_input_weights_alloc->handle(),
-                                cell_to_input_weights_handle, input_gate_bias_alloc->handle());
-  }
-  if (has_peephole_param)
-  {
-    auto cell_to_forget_weights_alloc =
-        _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
-    auto cell_to_output_weights_alloc =
-        _tensor_builder->at(cell_to_output_weights_index).get(); // optional
-    lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
-                                    cell_to_output_weights_alloc->handle());
-  }
-  if (has_projection_param)
-  {
-    auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? _tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
-    lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
-  }
-
-  fn->configure(
-      input_alloc->handle(), input_to_forget_weights_alloc->handle(),
-      input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
-      recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
-      recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
-      cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
-      cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
-      output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
-      lstm_params, act_info, cell_clip, projection_clip);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
 }
 
 void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1063,13 +811,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLComparison>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                 (arm_compute::ComparisonOperation)comparison_type);
 
   auto acl_fn = asAclClFunction(std::move(fn));
@@ -1107,13 +855,13 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_alloc = _tensor_builder->at(input_index);
-    orig_inputs_acl_tensor_shapes.emplace_back(input_alloc->info()->tensor_shape());
-    assert(input_rank == input_alloc->num_dimensions());
-    if (input_rank != input_alloc->info()->num_dimensions())
+    const auto &input_tensor = _tensor_builder->at(input_index);
+    orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
+    assert(input_rank == input_tensor->num_dimensions());
+    if (input_rank != input_tensor->info()->num_dimensions())
     {
       // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
           _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
     }
   }
@@ -1135,8 +883,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -1149,7 +897,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
 
     auto l = std::make_unique<::arm_compute::CLPermute>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
 
     fn = std::move(l);
   }
@@ -1160,7 +908,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
 
     auto l = std::make_unique<::arm_compute::CLPermute>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
 
     fn = std::move(l);
   }
@@ -1168,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   {
     auto l = std::make_unique<::arm_compute::CLCopy>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
     fn = std::move(l);
   }
@@ -1183,12 +931,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclClFunction(std::move(fn));
 }
@@ -1198,15 +946,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<arm_compute::CLActivationLayer>();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1219,12 +967,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLScale>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
                 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
                 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
 
@@ -1238,15 +986,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
 
   auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1258,15 +1006,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
 
   auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1288,25 +1036,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
 
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weights_alloc = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto weights_tensor = _tensor_builder->at(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
   auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
-  copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclClFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::CLRNNLayerEx>(
+  auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
-                bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
-                act_info);
+  fn->configure(input_tensor->handle(), weights_tensor->handle(),
+                recurrent_weights_tensor->handle(), bias_tensor->handle(),
+                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclClFunction(std::move(fn));
 }
 
@@ -1315,12 +1063,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLFloor>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1335,10 +1083,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-  auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
@@ -1346,8 +1094,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   std::unique_ptr<::arm_compute::IFunction> fn;
 
   auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
-  l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
-               ofm_alloc->handle());
+  l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+               ofm_tensor->handle());
   fn = std::move(l);
 
   auto acl_fn = asAclClFunction(std::move(fn));
@@ -1362,12 +1110,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
+  auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1376,32 +1124,15 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
 void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-
-  uint32_t kw = node.param().kw;
-  uint32_t kh = node.param().kh;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
-      ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1410,13 +1141,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
 
-  fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1442,15 +1173,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
   auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1466,17 +1197,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hits_alloc = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hits_tensor = _tensor_builder->at(hits_index).get();
 
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto keys_alloc = _tensor_builder->at(keys_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto keys_tensor = _tensor_builder->at(keys_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
 
-  fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
-                output_alloc->handle(), hits_alloc->handle());
+  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+                output_tensor->handle(), hits_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1489,13 +1220,13 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLPReLU>();
+  auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
 
-  fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1518,7 +1249,6 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
          (node.param().padding.type == ir::PaddingType::VALID));
   auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
                                       ker_shape.W, ker_shape.H);
-
   uint32_t invalid_horizontal = 0;
   uint32_t invalid_vertical = 0;
   if (node.param().padding.type == ir::PaddingType::VALID)
@@ -1528,17 +1258,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
   auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
-                invalid_horizontal, invalid_vertical);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+                tconv_info, invalid_horizontal, invalid_vertical);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1550,15 +1280,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
   auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1571,13 +1301,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
   const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
   const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1589,12 +1319,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1607,13 +1337,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1634,13 +1364,13 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
 
   const auto k = node.param().k;
 
-  auto values_alloc = _tensor_builder->at(outputValues_index).get();
-  auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
-  auto input_alloc = _tensor_builder->at(inputData_index).get();
+  auto values_tensor = _tensor_builder->at(outputValues_index).get();
+  auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
+  auto input_tensor = _tensor_builder->at(inputData_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLTopKV2>();
 
-  fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
+  fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1659,9 +1389,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto indices_alloc = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto indices_tensor = _tensor_builder->at(indices_index).get();
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1671,43 +1401,43 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  const auto backend_layout = ofm_alloc->layout();
+  const auto backend_layout = ofm_tensor->layout();
   UNUSED_RELEASE(backend_layout);
-  assert(backend_layout == ifm_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == ifm_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
   auto fn = std::make_unique<::arm_compute::CLGatherEx>();
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
-  assert(n == ifm_alloc->num_dimensions());
+  assert(n == ifm_tensor->num_dimensions());
   size_t k = _ctx.at(indices_index).shape().rank();
-  assert(k == indices_alloc->num_dimensions());
+  assert(k == indices_tensor->num_dimensions());
 
   // Disable applied dim_correction
-  const auto orig_ifm_acl_tensor_shape = ifm_alloc->info()->tensor_shape();
-  if (n != ifm_alloc->info()->num_dimensions())
+  const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
+  if (n != ifm_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
     const auto ifm = _ctx.at(ifm_index);
-    ifm_alloc->info()->set_tensor_shape(
+    ifm_tensor->info()->set_tensor_shape(
         acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
   }
-  const auto orig_indice_acl_tensor_shape = indices_alloc->info()->tensor_shape();
-  if (k != indices_alloc->info()->num_dimensions())
+  const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
+  if (k != indices_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and indices tensor is applied dim_correction
     const auto indices = _ctx.at(indices_index);
-    indices_alloc->info()->set_tensor_shape(
+    indices_tensor->info()->set_tensor_shape(
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // Revert disabling applied dim_correction
-  ifm_alloc->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
-  indices_alloc->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+  ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
+  indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1719,12 +1449,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLNeg>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1736,15 +1466,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
   auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1761,11 +1491,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   assert((ifm_shape.rank() - 1) == ofm_shape.rank());
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
   auto frontend_layout = _current_op_seq_layout;
-  auto backend_layout = ifm_alloc->layout();
+  auto backend_layout = ifm_tensor->layout();
 
   int axis_value = node.param().axis;
   if (axis_value < 0)
@@ -1776,10 +1506,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   auto acl_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLArgOperation>();
+  auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
-                ::arm_compute::ArgOperation::MAX);
+  fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+                ::arm_compute::ReductionOperation::ARG_IDX_MAX);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1791,12 +1521,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLCast>();
+  auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1814,15 +1544,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
   auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1837,12 +1567,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
+  auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -1860,13 +1590,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  std::vector<arm_compute::ICLTensor *> output_allocs;
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  std::vector<arm_compute::ICLTensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
   auto axis = node.param().axis;
   if (axis < 0)
     axis += ifm_rank;
@@ -1874,7 +1604,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
 
   auto fn = std::make_unique<::arm_compute::CLSplit>();
 
-  fn->configure(ifm_alloc->handle(), output_allocs, axis);
+  fn->configure(ifm_tensor->handle(), output_tensors, axis);
 
   _return_fn = asAclClFunction(std::move(fn));
 }
@@ -1906,13 +1636,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_alloc = _tensor_builder->at(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
-    assert(output_rank == output_alloc->num_dimensions());
-    if (output_rank != output_alloc->info()->num_dimensions())
+    const auto &output_tensor = _tensor_builder->at(output_index);
+    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+    assert(output_rank == output_tensor->num_dimensions());
+    if (output_rank != output_tensor->info()->num_dimensions())
     {
       // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
           _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
     }
   }
@@ -1959,12 +1689,12 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 
   // Disable applied dim_correction
   size_t input_rank = _ctx.at(input_index).shape().rank();
-  const auto &input_alloc = _tensor_builder->at(input_index);
-  assert(input_rank == input_alloc->num_dimensions());
-  if (input_rank != input_alloc->info()->num_dimensions())
+  const auto &input_tensor = _tensor_builder->at(input_index);
+  assert(input_rank == input_tensor->num_dimensions());
+  if (input_rank != input_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+    input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
         _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
   }
 
@@ -1982,13 +1712,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -2001,13 +1731,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclClFunction(std::move(fn));
 
@@ -2019,12 +1749,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
                 0);
 
   auto acl_fn = asAclClFunction(std::move(fn));
@@ -2037,12 +1767,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
                 0);
 
   auto acl_fn = asAclClFunction(std::move(fn));
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
new file mode 100644
index 000000000..9f7ce3764
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+#define __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+
+#include <exec/IFunction.h>
+#include <ir/Operands.h>
+
+#include <ir/operation/LSTM.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
+              const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+{
+  // TODO Support dynamic rnn
+  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+  const auto scratch_buffer_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+  const auto output_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+  const auto cell_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+  const auto input_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+  const auto input_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+  const auto input_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+  const auto recurrent_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+  const auto recurrent_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+  const auto recurrent_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto cell_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+  const auto cell_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+  const auto cell_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+  const auto input_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+  const auto forget_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+  const auto output_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+  const auto projection_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+  const auto projection_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+  const auto output_state_in_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+  const auto cell_threshold = node.param().cell_threshold;
+  const auto projection_threshold = node.param().projection_threshold;
+
+  bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                                    operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+  bool has_recurrent_to_input_weights =
+      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+  bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+  bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
+                                operands.at(projection_weights_index).shape().dim(1) != 0;
+  bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
+
+  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+  // true: no CIFG
+  // false: CIFG
+  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+  // true: peephole
+  // false: no peephole
+  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+  // NOTE Although the projection weights has data the projection bias may not have data.
+  bool has_projection_param = has_projection_weights;
+
+  const auto activation = node.param().activation;
+  const auto cell_clip = cell_threshold;
+  const auto projection_clip = projection_threshold;
+  assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+  auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
+  auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
+  auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
+  auto output_tensor = tensor_builder->at(output_index).get();
+
+  auto input_tensor = tensor_builder->at(input_index).get();
+
+  auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
+  auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
+  auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+  auto recurrent_to_forget_weights_tensor =
+      tensor_builder->at(recurrent_to_forget_weights_index).get();
+  auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+  auto recurrent_to_output_weights_tensor =
+      tensor_builder->at(recurrent_to_output_weights_index).get();
+
+  auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
+  auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
+  auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
+  auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
+  auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+
+  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+  auto fn = std::make_unique<T_ACLLayer>();
+
+  ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
+  if (has_cifg_param)
+  {
+    auto input_to_input_weights_tensor =
+        tensor_builder->at(input_to_input_weights_index).get(); // optional
+    auto recurrent_to_input_weights_tensor =
+        tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+    auto cell_to_input_weights_handle =
+        has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+                           : nullptr; // optional (non-cifg && peephole)
+    auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+    lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
+                                recurrent_to_input_weights_tensor->handle(),
+                                cell_to_input_weights_handle, input_gate_bias_tensor->handle());
+  }
+  if (has_peephole_param)
+  {
+    auto cell_to_forget_weights_tensor =
+        tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+    auto cell_to_output_weights_tensor =
+        tensor_builder->at(cell_to_output_weights_index).get(); // optional
+    lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
+                                    cell_to_output_weights_tensor->handle());
+  }
+  if (has_projection_param)
+  {
+    auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
+    auto projection_bias_handle = has_projection_bias
+                                      ? tensor_builder->at(projection_bias_index).get()->handle()
+                                      : nullptr; // optional
+    lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
+  }
+
+  fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+                input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+                recurrent_to_forget_weights_tensor->handle(),
+                recurrent_to_cell_weights_tensor->handle(),
+                recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+                cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+                output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+                scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+                cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
+                cell_clip, projection_clip);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+  const auto input_rank = operands.at(input_index).shape().rank();
+
+  const auto output_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+  UNUSED_RELEASE(output_size);
+  assert(operands.at(bias_index).shape().dim(0) == output_size);
+  assert(operands.at(weight_index).shape().dim(0) == output_size);
+  const auto batch_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+  const auto input_size =
+      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+
+  // Check for reshaping input's shape into rank-2
+  bool needs_reshape = false;
+  ir::Shape reshape(2);
+  if (input_rank == 3 || input_rank == 4)
+  {
+    const auto &ifm_shape = operands.at(input_index).shape();
+    auto feature_size = 1;
+    for (int i = 0; i < ifm_shape.rank(); ++i)
+    {
+      feature_size *= ifm_shape.dim(i);
+    }
+
+    UNUSED_RELEASE(feature_size);
+    assert(feature_size == batch_size * input_size);
+
+    // for reshaping
+    needs_reshape = true;
+    reshape.dim(0) = batch_size; /* H */
+    reshape.dim(1) = input_size; /* W */
+  }
+
+  auto output_tensor = tensor_builder->at(output_index).get();
+  const auto input_tensor = tensor_builder->at(input_index).get();
+  const auto weight_tensor = tensor_builder->at(weight_index).get();
+  const auto bias_tensor = tensor_builder->at(bias_index).get();
+  const auto frontend_layout = layout;
+  const auto acl_layout = output_tensor->handle()->info()->data_layout();
+
+  auto fn =
+      std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+  typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
+  if (operands.at(weight_index).isConstant())
+  {
+    kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
+    assert(operands.at(weight_index).data());
+  }
+
+  fn->configure(
+      input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
+      output_tensor->handle(), needs_reshape,
+      ::onert::backend::acl_common::asTensorShape(
+          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+      kernel_type);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+std::unique_ptr<::arm_compute::IFunction>
+kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
+                const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+                ::arm_compute::PoolingType pooling_type)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(0)};
+
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(layout);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(layout);
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+
+  VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
+  VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
+
+  auto ofm_tensor = tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = tensor_builder->at(ifm_index).get();
+
+  ::arm_compute::PoolingLayerInfo info{
+      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+  auto fn = std::make_unique<T_ACLLayer>();
+
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+
+  return fn;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index e47186754..1195b83cc 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -31,6 +31,7 @@
 #include "exec/NopFunction.h"
 #include "util/logging.h"
 #include "util/Utils.h"
+#include "AclKernelGen.h"
 
 namespace onert
 {
@@ -74,15 +75,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
   auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -96,10 +97,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
   auto frontend_layout = _current_op_seq_layout;
-  auto backend_layout = ifm_alloc->layout();
+  auto backend_layout = ifm_tensor->layout();
 
   int axis_value = node.param().axis;
   if (axis_value < 0)
@@ -112,7 +113,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
 
-  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
                 arm_compute::ReductionOperation::ARG_IDX_MAX);
 
   auto acl_fn = asAclFunction(std::move(fn));
@@ -127,15 +128,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
   auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
 
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -147,15 +148,26 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NECast>();
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (ifm_tensor->data_type() == ofm_tensor->data_type())
+  {
+    auto l = std::make_unique<::arm_compute::NECopy>();
 
-  auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
-                            ? arm_compute::SubDataType::BOOL
-                            : arm_compute::SubDataType::NONE;
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+    fn = std::move(l);
+  }
+  else
+  {
+    auto l = std::make_unique<::arm_compute::NECast>();
+
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+    fn = std::move(l);
+  }
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -183,10 +195,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -194,8 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+                ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -208,12 +221,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -242,10 +255,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -253,8 +266,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   {
     auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
 
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                  ofm_tensor->handle(), conv_info, multiplier, act_info);
 
     _return_fn = asAclFunction(std::move(fn));
   }
@@ -265,12 +278,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -279,88 +292,28 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
 
 void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
 
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
-                                       ::arm_compute::Size2D{kw, kh},
-                                       acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
 
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -383,7 +336,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_alloc = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_builder->at(ofm_index).get();
   std::vector<::arm_compute::ITensor *> input_tensors;
   for (const auto &ifm_ind : input_indexes)
     input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -392,7 +345,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   if (input_indexes.size() < 2)
   {
     auto l = std::make_unique<::arm_compute::NECopy>();
-    l->configure(input_tensors.at(0), output_alloc->handle());
+    l->configure(input_tensors.at(0), output_tensor->handle());
     fn = std::move(l);
   }
   else
@@ -400,10 +353,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = output_alloc->layout();
+    const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
     fn = std::move(l);
   }
 
@@ -418,13 +371,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
 
-  fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -436,12 +389,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEFloor>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -450,76 +403,15 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
-  using ir::operation::FullyConnected;
-
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
-  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
-  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-
-  const auto output_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
-  UNUSED_RELEASE(output_size);
-  assert(_ctx.at(bias_index).shape().dim(0) == output_size);
-  assert(_ctx.at(weight_index).shape().dim(0) == output_size);
-  const auto batch_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
-  const auto input_size =
-      _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
-  // Check for reshaping input's shape into rank-2
-  bool needs_reshape = false;
-  ir::Shape reshape(2);
-  if (input_rank == 3 || input_rank == 4)
-  {
-    const auto &ifm_shape = _ctx.at(input_index).shape();
-    auto feature_size = 1;
-    for (int i = 0; i < ifm_shape.rank(); ++i)
-    {
-      feature_size *= ifm_shape.dim(i);
-    }
-
-    UNUSED_RELEASE(feature_size);
-    assert(feature_size == batch_size * input_size);
-
-    // for reshaping
-    needs_reshape = true;
-    reshape.dim(0) = batch_size; /* H */
-    reshape.dim(1) = input_size; /* W */
-  }
-
+  auto output_tensor = _tensor_builder->at(output_index).get();
   const auto activation = node.param().activation;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  const auto input_alloc = _tensor_builder->at(input_index).get();
-  const auto weight_alloc = _tensor_builder->at(weight_index).get();
-  const auto bias_alloc = _tensor_builder->at(bias_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
-  auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
-      arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
-  if (_ctx.at(weight_index).isConstant())
-  {
-    kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
-    assert(_ctx.at(weight_index).data());
-  }
-
-  fn->configure(
-      input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
-      needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
-
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
+                                                ::arm_compute::NEFullyConnectedReshapingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, output_alloc->handle()));
+      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -531,17 +423,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hits_alloc = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hits_tensor = _tensor_builder->at(hits_index).get();
 
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto keys_alloc = _tensor_builder->at(keys_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto keys_tensor = _tensor_builder->at(keys_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
 
-  fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
-                output_alloc->handle(), hits_alloc->handle());
+  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+                output_tensor->handle(), hits_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -561,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   // Converting in reverse order
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto indices_alloc = _tensor_builder->at(indices_index).get();
-  const auto backend_layout = ofm_alloc->layout();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  const auto backend_layout = ofm_tensor->layout();
   UNUSED_RELEASE(backend_layout);
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
@@ -575,35 +467,35 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  assert(backend_layout == ifm_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == ifm_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
   auto fn = std::make_unique<::arm_compute::NEGatherEx>();
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
-  assert(n == ifm_alloc->num_dimensions());
+  assert(n == ifm_tensor->num_dimensions());
   size_t k = _ctx.at(indices_index).shape().rank();
-  assert(k == indices_alloc->num_dimensions());
+  assert(k == indices_tensor->num_dimensions());
 
   // Disable applied dim_correction
-  if (n != ifm_alloc->info()->num_dimensions())
+  if (n != ifm_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
     const auto ifm = _ctx.at(ifm_index);
-    ifm_alloc->info()->set_tensor_shape(
+    ifm_tensor->info()->set_tensor_shape(
         acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
   }
-  if (k != indices_alloc->info()->num_dimensions())
+  if (k != indices_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and indices tensor is applied dim_correction
     const auto indices = _ctx.at(indices_index);
-    indices_alloc->info()->set_tensor_shape(
+    indices_tensor->info()->set_tensor_shape(
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
   // use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -621,20 +513,20 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto gamma_alloc = _tensor_builder->at(gamma_index).get();
-  auto beta_alloc = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+  auto beta_tensor = _tensor_builder->at(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
   auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
-                beta_alloc->handle(), epsilon);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+                beta_tensor->handle(), epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -656,15 +548,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
   auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -673,32 +565,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
 
 void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
 
-  uint32_t kw = node.param().kw;
-  uint32_t kh = node.param().kh;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
   const auto activation = node.param().activation;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
-      ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -712,15 +587,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
   auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -733,13 +608,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
   const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
   const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -751,12 +626,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -769,13 +644,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
   const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
   const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::NELogicalOr>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -787,8 +662,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
@@ -798,7 +673,7 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
   // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
   auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -807,159 +682,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
-  // TODO Support dynamic rnn
-  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
-  const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
-  const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
-  const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
-  const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
-  const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
-  const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
-  const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
-  const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
-  const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
-  const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
-  const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
-  const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
-  const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
-  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
-  const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
-  const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
-  const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
-  const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
-  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-  const auto cell_threshold = node.param().cell_threshold;
-  const auto projection_threshold = node.param().projection_threshold;
-
-  bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                    _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
-  bool has_recurrent_to_input_weights =
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
-  bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                _ctx.at(projection_weights_index).shape().dim(1) != 0;
-  bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
-  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
-  // true: no CIFG
-  // false: CIFG
-  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
-  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
-  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
-  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
-  // true: peephole
-  // false: no peephole
-  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
-  // NOTE Although the projection weights has data the projection bias may not have data.
-  bool has_projection_param = has_projection_weights;
-
-  const auto activation = node.param().activation;
-  const auto cell_clip = cell_threshold;
-  const auto projection_clip = projection_threshold;
-  assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
-  auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
-  auto output_alloc = _tensor_builder->at(output_index).get();
-
-  auto input_alloc = _tensor_builder->at(input_index).get();
-
-  auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
-  auto recurrent_to_forget_weights_alloc =
-      _tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
-  auto recurrent_to_output_weights_alloc =
-      _tensor_builder->at(recurrent_to_output_weights_index).get();
-
-  auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<::arm_compute::NELSTMLayer>();
-
-  ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
-  if (has_cifg_param)
-  {
-    auto input_to_input_weights_alloc =
-        _tensor_builder->at(input_to_input_weights_index).get(); // optional
-    auto recurrent_to_input_weights_alloc =
-        _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
-    auto cell_to_input_weights_handle =
-        has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
-    lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
-                                recurrent_to_input_weights_alloc->handle(),
-                                cell_to_input_weights_handle, input_gate_bias_alloc->handle());
-  }
-  if (has_peephole_param)
-  {
-    auto cell_to_forget_weights_alloc =
-        _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
-    auto cell_to_output_weights_alloc =
-        _tensor_builder->at(cell_to_output_weights_index).get(); // optional
-    lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
-                                    cell_to_output_weights_alloc->handle());
-  }
-  if (has_projection_param)
-  {
-    auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? _tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
-    lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
-  }
-
-  fn->configure(
-      input_alloc->handle(), input_to_forget_weights_alloc->handle(),
-      input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
-      recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
-      recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
-      cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
-      cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
-      output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
-      lstm_params, act_info, cell_clip, projection_clip);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
 }
 
 void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -970,18 +694,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
 
   // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
                 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Neg &node)
@@ -989,12 +713,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::NENegLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1030,12 +754,12 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_alloc = _tensor_builder->at(input_index);
-    assert(input_rank == input_alloc->num_dimensions());
-    if (input_rank != input_alloc->info()->num_dimensions())
+    const auto &input_tensor = _tensor_builder->at(input_index);
+    assert(input_rank == input_tensor->num_dimensions());
+    if (input_rank != input_tensor->info()->num_dimensions())
     {
       // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
           _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
     }
   }
@@ -1094,8 +818,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -1108,7 +832,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
 
     auto l = std::make_unique<::arm_compute::NEPermute>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
 
     fn = std::move(l);
   }
@@ -1119,7 +843,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
 
     auto l = std::make_unique<::arm_compute::NEPermute>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
 
     fn = std::move(l);
   }
@@ -1127,7 +851,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   {
     auto l = std::make_unique<::arm_compute::NECopy>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
     fn = std::move(l);
   }
@@ -1143,15 +867,15 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
 
   std::unique_ptr<::arm_compute::IFunction> fn;
 
-  auto l = std::make_unique<::arm_compute::NEPReLU>();
+  auto l = std::make_unique<::arm_compute::NEPReluLayer>();
 
-  l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+  l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
   fn = std::move(l);
 
@@ -1166,14 +890,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = input_alloc->layout();
+  const auto backend_layout = input_tensor->layout();
   const auto reduce_axes =
       acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
   const auto reduce_type = node.param().reduce_type;
@@ -1182,11 +906,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
-    // NOTE NEReduceMean has a bug that does not support NHWC layout
-    //      NEReduceMean intermediate tensors are always NCHW layout
-    auto l = std::make_unique<::arm_compute::NEReduceMeanEx>();
+    auto l = std::make_unique<::arm_compute::NEReduceMean>();
 
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
 
     fn = std::move(l);
   }
@@ -1194,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   {
     auto l = std::make_unique<::arm_compute::NEReduceSum>();
 
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
 
     fn = std::move(l);
   }
@@ -1202,7 +924,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   {
     auto l = std::make_unique<::arm_compute::NEReduceOperation>();
 
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle(),
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
                  acl_common::convertReduceType(reduce_type));
 
     fn = std::move(l);
@@ -1218,15 +940,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<arm_compute::NEActivationLayer>();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1238,15 +960,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
 
   auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1258,15 +980,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
 
   auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1278,13 +1000,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
   UNUSED_RELEASE(frontend_layout);
@@ -1292,7 +1014,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1305,12 +1027,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEScale>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
                 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
                 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
 
@@ -1334,25 +1056,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
 
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weights_alloc = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto weights_tensor = _tensor_builder->at(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
   auto copy_layer = std::make_unique<::arm_compute::NECopy>();
-  copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::NERNNLayerEx>(
+  auto fn = std::make_unique<::arm_compute::NERNNLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
-                bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
-                act_info);
+  fn->configure(input_tensor->handle(), weights_tensor->handle(),
+                recurrent_weights_tensor->handle(), bias_tensor->handle(),
+                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclFunction(std::move(fn));
 }
 
@@ -1361,12 +1083,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
   auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1383,10 +1105,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
   auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
   auto acl_fn = asAclFunction(std::move(fn));
   _return_fn = std::move(acl_fn);
 }
@@ -1396,15 +1118,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<arm_compute::NEActivationLayer>();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1417,13 +1139,25 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
   const auto beta = node.param().beta;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  const auto frontend_layout = _current_op_seq_layout;
+  const auto backend_layout = input_tensor->layout();
+
+  // Disable applied dim_correction
+  const size_t input_rank = _ctx.at(input_index).shape().rank();
+  if (input_rank != input_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    const auto input = _ctx.at(input_index);
+    input_tensor->info()->set_tensor_shape(
+        acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
+  }
 
   auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
       _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1438,20 +1172,18 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-  auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
-  // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
-  // not 0.
-  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
 
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
-                ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+                ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1465,12 +1197,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
 
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1489,13 +1221,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  std::vector<arm_compute::ITensor *> output_allocs;
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  std::vector<arm_compute::ITensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
   auto axis = node.param().axis;
   if (axis < 0)
     axis += ifm_rank;
@@ -1503,7 +1235,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
 
   auto fn = std::make_unique<::arm_compute::NESplit>();
 
-  fn->configure(ifm_alloc->handle(), output_allocs, axis);
+  fn->configure(ifm_tensor->handle(), output_tensors, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1513,15 +1245,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   const ::arm_compute::ActivationLayerInfo act_info{
       ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
   auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1534,13 +1266,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1555,17 +1287,17 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                 arm_compute::ConvertPolicy::SATURATE);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1575,10 +1307,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
   int input_rank = _ctx.at(input_index).shape().rank();
@@ -1628,7 +1360,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto fn = std::make_unique<::arm_compute::NESlice>();
 
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1643,10 +1375,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
   int input_rank = _ctx.at(input_index).shape().rank();
@@ -1715,7 +1447,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
 
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
                 strides_set, begin_mask, end_mask, shrink_axis_mask);
 
   auto acl_fn = asAclFunction(std::move(fn));
@@ -1749,16 +1481,16 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
   auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
-                invalid_horizontal, invalid_vertical);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+                tconv_info, invalid_horizontal, invalid_vertical);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1771,10 +1503,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
   const auto &perm{node.param().perm};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
 
   const auto rank = _ctx.at(ifm_idx).shape().rank();
   std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
@@ -1783,11 +1515,11 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   std::unique_ptr<::arm_compute::IFunction> fn;
 
-  if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
+  if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
   {
     auto l = std::make_unique<::arm_compute::NETranspose>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
 
     fn = std::move(l);
   }
@@ -1795,7 +1527,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   {
     auto l = std::make_unique<::arm_compute::NEPermute>();
 
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
 
     fn = std::move(l);
   }
@@ -1834,13 +1566,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_alloc = _tensor_builder->at(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
-    assert(output_rank == output_alloc->num_dimensions());
-    if (output_rank != output_alloc->info()->num_dimensions())
+    const auto &output_tensor = _tensor_builder->at(output_index);
+    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+    assert(output_rank == output_tensor->num_dimensions());
+    if (output_rank != output_tensor->info()->num_dimensions())
     {
       // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
           _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
     }
   }
@@ -1858,17 +1590,17 @@ void KernelGenerator::visit(const ir::operation::Add &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                 arm_compute::ConvertPolicy::SATURATE);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Div &node)
@@ -1879,16 +1611,16 @@ void KernelGenerator::visit(const ir::operation::Div &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -1896,12 +1628,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEExpLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1913,12 +1645,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
 
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1933,13 +1665,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
 
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                 (arm_compute::ComparisonOperation)comparison_type);
 
   auto acl_fn = asAclFunction(std::move(fn));
@@ -1953,13 +1685,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -1972,13 +1704,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
 
   auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
 
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   auto acl_fn = asAclFunction(std::move(fn));
 
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 2daf06aca..56bd352e0 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -17,6 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_BACKEND_H__
 #define __ONERT_BACKEND_CPU_BACKEND_H__
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -39,9 +40,9 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &kb,
-                                             bool) const override
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
   {
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
@@ -49,7 +50,8 @@ public:
     auto tb = std::make_shared<TensorBuilder>();
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+                                                            context->external_context());
     context->tensor_register = nullptr;
     context->optimizer = nullptr;
     return context;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
new file mode 100644
index 000000000..f314a8e39
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+                 std::shared_ptr<IOptimizer> optimizer = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
+                                       kernel_gen, tensor_register, optimizer),
+        _external_context(new ExternalContext)
+  {
+  }
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index e997a2291..01a3cd178 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
 
+nnfw_find_package(Ruy REQUIRED)
+
 file(GLOB_RECURSE SOURCES "*.cc")
 
 add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
@@ -8,6 +10,8 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
 
 set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
 
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index 71e313628..deb27f0fe 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -15,6 +15,7 @@
  */
 
 #include "ConstantInitializer.h"
+#include "Tensor.h"
 
 namespace onert
 {
@@ -30,39 +31,61 @@ ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
   // DO NOTHING
 }
 
+void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
+                                                     const ir::Operand &obj)
+{
+  registerExternalInitializer(index, obj);
+}
+
+void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
+                                                      const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
+    auto data = model_obj.shareData();
+    assert(data && data->base());
+    ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
+    tensor.setData(data);
+  };
+}
+
 void ConstantInitializer::visit(const ir::operation::Conv2D &node)
 {
   const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
   const auto &kernel_obj = _operands.at(kernel_index);
-  registerCopyInitializer(kernel_index, kernel_obj);
+  registerExternalInitializer(kernel_index, kernel_obj);
 
   const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
   const auto &bias_obj = _operands.at(bias_index);
-  registerCopyInitializer(bias_index, bias_obj);
+  registerExternalInitializer(bias_index, bias_obj);
 }
 
 void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
 {
   const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
   const auto &kernel_obj = _operands.at(kernel_index);
-  registerCopyInitializer(kernel_index, kernel_obj);
+  registerExternalInitializer(kernel_index, kernel_obj);
 
   const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
   const auto &bias_obj = _operands.at(bias_index);
-  registerCopyInitializer(bias_index, bias_obj);
+  registerExternalInitializer(bias_index, bias_obj);
 }
 
 void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
 {
   const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
   const auto &weight_obj = _operands.at(weight_index);
-  registerCopyInitializer(weight_index, weight_obj);
+  registerExternalInitializer(weight_index, weight_obj);
 
   const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
   if (!bias_index.undefined())
   {
     const auto &bias_obj = _operands.at(bias_index);
-    registerCopyInitializer(bias_index, bias_obj);
+    registerExternalInitializer(bias_index, bias_obj);
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index bd06c64d1..de03a693a 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -36,6 +36,15 @@ public:
                       const std::shared_ptr<TensorBuilder> &tensor_builder);
 
 public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+  // TODO: For now the only cpu backend supports constant tensor to use data from external
+  // If the other backend supports (to do this,
+  // ExternalTensor should be abstract such as IExternal, maybe),
+  // this can be an interface of IConstantInitializer
+  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+public:
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
   void visit(const ir::operation::FullyConnected &) override;
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
new file mode 100644
index 000000000..6627412d2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+
+#include <backend/IExternalContext.h>
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ExternalContext : public IExternalContext
+{
+public:
+  ExternalContext() : _ruy_context(new ruy::Context)
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+#ifdef USE_RUY_GEMV
+    _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
+#endif
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->max_num_threads = target_num_threads;
+  }
+
+  ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 72f960675..7939fe894 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -20,6 +20,7 @@
 #include "ops/AddLayer.h"
 #include "ops/ArgMinMaxLayer.h"
 #include "ops/AvgPoolLayer.h"
+#include "ops/BatchToSpaceNDLayer.h"
 #include "ops/CastLayer.h"
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
@@ -49,7 +50,9 @@
 #include "ops/RangeLayer.h"
 #include "ops/ReduceLayer.h"
 #include "ops/ReLULayer.h"
+#include "ops/ReLU6Layer.h"
 #include "ops/ReshapeLayer.h"
+#include "ops/ResizeBilinearLayer.h"
 #include "ops/ReverseLayer.h"
 #include "ops/RoundLayer.h"
 #include "ops/RsqrtLayer.h"
@@ -60,7 +63,9 @@
 #include "ops/SoftMaxLayer.h"
 #include "ops/StridedSliceLayer.h"
 #include "ops/SpaceToBatchNDLayer.h"
+#include "ops/SpaceToDepthLayer.h"
 #include "ops/SplitLayer.h"
+#include "ops/SplitVLayer.h"
 #include "ops/SubLayer.h"
 #include "ops/TanhLayer.h"
 #include "ops/TileLayer.h"
@@ -70,11 +75,14 @@
 #include "ops/ZerosLikeLayer.h"
 #include "ops/SquaredDiffLayer.h"
 #include "ops/LogicalOrLayer.h"
+#include "ops/L2NormLayer.h"
 #include "ops/MatrixBandPartLayer.h"
 #include "ops/BatchMatMulLayer.h"
 #include "ops/BroadcastToLayer.h"
 #include "ops/FusedBatchNormLayer.h"
 #include "ops/LogSoftMaxLayer.h"
+#include "ops/QuantizeLayer.h"
+#include "ops/StatelessRandomUniformLayer.h"
 
 #include <backend/Backend.h>
 #include <backend/IConfig.h>
@@ -119,9 +127,11 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
 KernelGenerator::KernelGenerator(
     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
     const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder)
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
+      _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -184,10 +194,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
-  auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
 
   const auto stride = node.param().stride;
   const auto activation = node.param().activation;
@@ -196,9 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
 
   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
   {
-    fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, param_padding.param.left,
+    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
-                  stride.horizontal, stride.vertical, activation, ofm_alloc);
+                  stride.horizontal, stride.vertical, activation, ofm_tensor);
 
     _return_fn = std::move(fn);
     return;
@@ -213,9 +223,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto padding =
       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
 
-  fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, padding.left, padding.right,
-                padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
-                ofm_alloc);
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -241,16 +251,16 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
-  auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
 
   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 
-  fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
                 padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
-                ofm_alloc);
+                ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -270,13 +280,13 @@ void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::MaxPoolLayer>();
 
-  fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -295,13 +305,13 @@ void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::AvgPoolLayer>();
 
-  fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -313,7 +323,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto rank = _ctx.at(ofm_index).shape().rank();
   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
@@ -321,7 +331,33 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
 
   auto fn = std::make_unique<ops::ConcatLayer>();
 
-  fn->configure(input_tensors, axis, output_alloc);
+  fn->configure(input_tensors, axis, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
+  const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
+
+  auto output_alloc = _tensor_builder->portableAt(output_index).get();
+  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+
+  auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
+
+  IPortableTensor *crops_alloc = nullptr;
+  const auto NNApiInputs = 2;
+
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
+    crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+  }
+
+  fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 
   _return_fn = std::move(fn);
 }
@@ -332,13 +368,13 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto value_alloc = _tensor_builder->portableAt(value_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto value_tensor = _tensor_builder->portableAt(value_index).get();
 
   auto fn = std::make_unique<ops::FillLayer>();
 
-  fn->configure(input_alloc, value_alloc, output_alloc);
+  fn->configure(input_tensor, value_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -353,15 +389,16 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
   const auto activation = node.param().activation;
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto weight_alloc = _tensor_builder->portableAt(weight_index).get();
-  auto bias_alloc =
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+  auto bias_tensor =
       bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
 
   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 
-  fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
+                _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -371,21 +408,21 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   // optional 2nd input
-  IPortableTensor *shape_alloc = nullptr;
+  IPortableTensor *shape_tensor = nullptr;
 
   if (node.getInputs().size() == 2)
   {
     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_alloc = _tensor_builder->portableAt(shape_index).get();
+    shape_tensor = _tensor_builder->portableAt(shape_index).get();
   }
 
   auto fn = std::make_unique<ops::ReshapeLayer>();
 
-  fn->configure(input_alloc, shape_alloc, output_alloc);
+  fn->configure(input_tensor, shape_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -394,13 +431,13 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   // Squeeze can share same kernel with reshape
   auto fn = std::make_unique<ops::ReshapeLayer>();
 
-  fn->configure(input_alloc, nullptr, output_alloc);
+  fn->configure(input_tensor, nullptr, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -412,12 +449,12 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::SoftMaxLayer>();
 
-  fn->configure(input_alloc, beta, output_alloc);
+  fn->configure(input_tensor, beta, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -430,13 +467,13 @@ void KernelGenerator::visit(const ir::operation::Add &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::AddLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -447,15 +484,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto comparison_type = node.param().comparison_type;
 
   auto fn = std::make_unique<ops::CompareLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -466,11 +503,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
 
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
   UNUSED_RELEASE(backend_layout);
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
@@ -481,8 +518,8 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  assert(backend_layout == input_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == input_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
   const auto &input_shape = _ctx.at(input_index).shape();
   UNUSED_RELEASE(input_shape);
   assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
@@ -492,7 +529,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
 
   auto fn = std::make_unique<ops::GatherLayer>();
 
-  fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+  fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 
   _return_fn = std::move(fn);
 }
@@ -506,13 +543,13 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::SubLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -526,13 +563,13 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::MulLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -547,18 +584,18 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
 
   const auto axis = node.param().axis;
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
-  auto depth_alloc = _tensor_builder->portableAt(depth_index).get();
-  auto onvalue_alloc = _tensor_builder->portableAt(onvalue_index).get();
-  auto offvalue_alloc = _tensor_builder->portableAt(offvalue_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+  auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
+  auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
+  auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
 
-  assert(indices_alloc->data_type() == OperandType::INT32);
-  assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+  assert(indices_tensor->data_type() == OperandType::INT32);
+  assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
 
   auto fn = std::make_unique<ops::OneHotLayer>();
 
-  fn->configure(indices_alloc, depth_alloc, onvalue_alloc, offvalue_alloc, output_alloc, axis);
+  fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 
   _return_fn = std::move(fn);
 }
@@ -572,13 +609,13 @@ void KernelGenerator::visit(const ir::operation::Div &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::DivLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -587,16 +624,16 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
-  std::vector<const IPortableTensor *> input_allocs;
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
 
   const auto equation = node.param().equation;
 
   auto fn = std::make_unique<ops::EinsumLayer>();
 
-  fn->configure(input_allocs, equation, output_alloc);
+  fn->configure(input_tensors, equation, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -605,14 +642,14 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
 {
   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
                           std::vector<custom::TypeInfo> &types,
-                          std::vector<std::shared_ptr<IPortableTensor>> &allocs) {
+                          std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
     for (auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
-      auto in_alloc = _tensor_builder->portableAt(idx);
-      allocs.emplace_back(in_alloc);
+      auto in_tensor = _tensor_builder->portableAt(idx);
+      tensors.emplace_back(in_tensor);
     }
   };
 
@@ -634,12 +671,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::ExpLayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -650,13 +687,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
 
   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_alloc, axis_alloc, output_alloc);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -666,12 +703,12 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::LogisticLayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -681,12 +718,12 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::TanhLayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -700,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 
   assert(-rank <= axis && axis < rank);
 
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
@@ -708,7 +745,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 
   auto fn = std::make_unique<ops::PackLayer>();
 
-  fn->configure(input_tensors, axis, output_alloc);
+  fn->configure(input_tensors, axis, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -722,7 +759,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   std::vector<IPortableTensor *> output_tensors;
   for (auto &output_idx : node.getOutputs())
@@ -732,7 +769,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
 
   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 
-  fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+  fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 
   _return_fn = std::move(fn);
 }
@@ -751,8 +788,16 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 
   auto fn = std::make_unique<ops::PadLayer>();
 
-  fn->configure(input, output, pad_base, pad_rank);
+  bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+  const void *value = nullptr;
+
+  if (isPadV2)
+  {
+    const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
+    value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
+  }
 
+  fn->configure(input, output, pad_base, pad_rank, value);
   _return_fn = std::move(fn);
 }
 
@@ -762,13 +807,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::MaxLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -779,13 +824,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::MinLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -795,12 +840,12 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::CastLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -810,12 +855,12 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::TransposeLayer>();
 
-  fn->configure(input_alloc, output_alloc, node.param().perm);
+  fn->configure(input_tensor, output_tensor, node.param().perm);
 
   _return_fn = std::move(fn);
 }
@@ -827,15 +872,15 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
   const auto keep_dims = node.param().keep_dims;
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axes_alloc = _tensor_builder->portableAt(axes_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
 
   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
     auto fn = std::make_unique<ops::MeanLayer>();
 
-    fn->configure(input_alloc, axes_alloc, output_alloc, keep_dims);
+    fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 
     _return_fn = std::move(fn);
   }
@@ -844,7 +889,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
     auto fn = std::make_unique<ops::ReduceLayer>();
 
     const auto reduce_type = convertReduceType(node.param().reduce_type);
-    fn->configure(input_alloc, axes_alloc, output_alloc, reduce_type, keep_dims);
+    fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 
     _return_fn = std::move(fn);
   }
@@ -855,12 +900,27 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(0)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::ReLULayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ReLU6Layer>();
+
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -872,14 +932,14 @@ void KernelGenerator::visit(const ir::operation::Select &node)
   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto condition_alloc = _tensor_builder->portableAt(condition_index).get();
-  auto true_alloc = _tensor_builder->portableAt(true_index).get();
-  auto false_alloc = _tensor_builder->portableAt(false_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
+  auto true_tensor = _tensor_builder->portableAt(true_index).get();
+  auto false_tensor = _tensor_builder->portableAt(false_index).get();
 
   auto fn = std::make_unique<ops::SelectLayer>();
 
-  fn->configure(condition_alloc, true_alloc, false_alloc, output_alloc);
+  fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -891,14 +951,14 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto begins_alloc = _tensor_builder->portableAt(begins_index).get();
-  auto sizes_alloc = _tensor_builder->portableAt(sizes_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
+  auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
 
   auto fn = std::make_unique<ops::SliceLayer>();
 
-  fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+  fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -911,11 +971,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto starts_alloc = _tensor_builder->portableAt(starts_index).get();
-  auto ends_alloc = _tensor_builder->portableAt(ends_index).get();
-  auto strides_alloc = _tensor_builder->portableAt(strides_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
+  auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
+  auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
 
   auto begin_mask = node.param().begin_mask;
   auto end_mask = node.param().end_mask;
@@ -923,7 +983,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto fn = std::make_unique<ops::StridedSliceLayer>();
 
-  fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+  fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
                 end_mask, shrink_axis_mask);
 
   _return_fn = std::move(fn);
@@ -957,12 +1017,12 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::AbsLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -972,12 +1032,12 @@ void KernelGenerator::visit(const ir::operation::Sin &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::SinLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -987,12 +1047,12 @@ void KernelGenerator::visit(const ir::operation::Cos &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::CosLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1002,12 +1062,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::RsqrtLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1017,12 +1077,33 @@ void KernelGenerator::visit(const ir::operation::Shape &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::ShapeLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
+
+  auto output_height = node.param().height_out;
+  auto output_width = node.param().width_out;
+  auto align_corners = node.param().align_corners;
+  auto half_pixel_centers = node.param().half_pixel_centers;
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ResizeBilinearLayer>();
+
+  fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
+                half_pixel_centers);
 
   _return_fn = std::move(fn);
 }
@@ -1033,13 +1114,13 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
 
   auto fn = std::make_unique<ops::ReverseLayer>();
 
-  fn->configure(input_alloc, axis_alloc, output_alloc);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1049,12 +1130,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::NegLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1066,12 +1147,12 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto axis = node.param().axis;
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
-  fn->configure(input_alloc, output_alloc, axis, /* is_arg_max */ true);
+  fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
 
   _return_fn = std::move(fn);
 }
@@ -1082,13 +1163,13 @@ void KernelGenerator::visit(const ir::operation::Pow &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::PowLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, ir::Activation::NONE, output_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1098,12 +1179,12 @@ void KernelGenerator::visit(const ir::operation::Log &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
 
   auto fn = std::make_unique<ops::LogLayer>();
 
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1113,12 +1194,12 @@ void KernelGenerator::visit(const ir::operation::Round &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::RoundLayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1128,12 +1209,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::LogicalNotLayer>();
 
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1144,28 +1225,43 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
   const auto lhs_index{node.getInputs().at(0)};
   const auto rhs_index{node.getInputs().at(1)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::LogicalOrLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+  const auto input_index{node.getInputs().at(0)};
 
   auto output_alloc = _tensor_builder->portableAt(output_index).get();
   auto input_alloc = _tensor_builder->portableAt(input_index).get();
 
-  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+  auto fn = std::make_unique<ops::L2NormLayer>();
 
   fn->configure(input_alloc, output_alloc);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+
+  fn->configure(input_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1176,14 +1272,14 @@ void KernelGenerator::visit(const ir::operation::Range &node)
   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto start_alloc = _tensor_builder->portableAt(start_index).get();
-  auto limit_alloc = _tensor_builder->portableAt(limit_index).get();
-  auto delta_alloc = _tensor_builder->portableAt(delta_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto start_tensor = _tensor_builder->portableAt(start_index).get();
+  auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
+  auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
 
   auto fn = std::make_unique<ops::RangeLayer>();
 
-  fn->configure(start_alloc, limit_alloc, delta_alloc, output_alloc);
+  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1193,13 +1289,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   auto fn = std::make_unique<ops::SqDiffLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1209,13 +1305,13 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto multiples_alloc = _tensor_builder->portableAt(multiples_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
 
   auto fn = std::make_unique<ops::TileLayer>();
 
-  fn->configure(input_alloc, multiples_alloc, output_alloc);
+  fn->configure(input_tensor, multiples_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1226,14 +1322,14 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto num_lower_alloc = _tensor_builder->portableAt(num_lower_index).get();
-  auto num_upper_alloc = _tensor_builder->portableAt(num_upper_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
+  auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
 
   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
 
-  fn->configure(input_alloc, num_lower_alloc, num_upper_alloc, output_alloc);
+  fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1243,16 +1339,16 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
 
   const auto adj_x = node.param().adj_x;
   const auto adj_y = node.param().adj_y;
 
   auto fn = std::make_unique<ops::BatchMatMulLayer>();
 
-  fn->configure(lhs_alloc, rhs_alloc, adj_x, adj_y, output_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1262,13 +1358,13 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
 
   auto fn = std::make_unique<ops::BroadcastToLayer>();
 
-  fn->configure(input_alloc, shape_alloc, output_alloc);
+  fn->configure(input_tensor, shape_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1277,10 +1373,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
-  std::vector<const IPortableTensor *> input_allocs;
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
 
   const auto epsilon = node.param().epsilon;
   const auto is_training = node.param().is_training;
@@ -1288,7 +1384,7 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 
   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
 
-  fn->configure(input_allocs, epsilon, is_training, data_format, output_alloc);
+  fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1301,12 +1397,12 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
   const auto beta = node.param().beta;
   const auto axis = node.param().axis;
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
 
   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
 
-  fn->configure(input_alloc, beta, axis, output_alloc);
+  fn->configure(input_tensor, beta, axis, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1318,14 +1414,84 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto block_shape_alloc = _tensor_builder->portableAt(block_shape_index).get();
-  auto padding_alloc = _tensor_builder->portableAt(padding_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
+  auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
 
   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
 
-  fn->configure(input_alloc, block_shape_alloc, padding_alloc, output_alloc);
+  fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Quantize &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+  auto fn = std::make_unique<ops::QuantizeLayer>();
+
+  fn->configure(input_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  auto block_size = node.param().block_size;
+
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+  auto fn = std::make_unique<ops::SpaceToDepthLayer>();
+
+  fn->configure(input_tensor, block_size, output_tensor);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
+  const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
+
+  auto output_alloc = _tensor_builder->portableAt(output_index).get();
+  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+  auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+
+  auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
+
+  fn->configure(shape_alloc, seed_alloc, output_alloc);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+  const auto num_splits = node.param().num_splits;
+  assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+  const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+  const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+  const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+  auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
+  auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+
+  std::vector<IPortableTensor *> out_tensors;
+  for (auto &output_idx : node.getOutputs())
+    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+
+  auto fn = std::make_unique<ops::SplitVLayer>();
+
+  fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
 
   _return_fn = std::move(fn);
 }
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index d6f4c2825..40c056a96 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -17,6 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
 
+#include "ExternalContext.h"
 #include "TensorBuilder.h"
 #include "Tensor.h"
 
@@ -37,7 +38,8 @@ class KernelGenerator : public IKernelGenerator
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
                   const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
 
   using IKernelGenerator::visit;
 
@@ -74,6 +76,7 @@ public:
   void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::ReLU &) override;
+  void visit(const ir::operation::ReLU6 &) override;
   void visit(const ir::operation::Select &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
@@ -83,6 +86,7 @@ public:
   void visit(const ir::operation::Sin &) override;
   void visit(const ir::operation::RSQRT &) override;
   void visit(const ir::operation::Shape &) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
   void visit(const ir::operation::Reverse &) override;
   void visit(const ir::operation::Neg &) override;
   void visit(const ir::operation::ArgMax &) override;
@@ -94,13 +98,19 @@ public:
   void visit(const ir::operation::SquaredDifference &) override;
   void visit(const ir::operation::Tile &) override;
   void visit(const ir::operation::LogicalOr &) override;
+  void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
   void visit(const ir::operation::MatrixBandPart &) override;
   void visit(const ir::operation::BatchMatMul &) override;
+  void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BroadcastTo &) override;
   void visit(const ir::operation::FusedBatchNorm &) override;
   void visit(const ir::operation::LogSoftmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
+  void visit(const ir::operation::Quantize &) override;
+  void visit(const ir::operation::SpaceToDepth &) override;
+  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::SplitV &) override;
 
 private:
   const ir::Operands &_ctx;
@@ -108,6 +118,7 @@ private:
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
   ir::Layout _current_op_seq_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
 };
 
 } // namespace cpu
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
new file mode 100644
index 000000000..78c98dabf
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+                                         cpu_common::DynamicTensorManager *dynamic_tensor_manager)
+    : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
+      _dynamic_tensor_manager{dynamic_tensor_manager}
+{
+  // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+  _nonconst_mgr->allocate();
+
+  for (auto &pair : _tensors->native_tensors())
+  {
+    const auto &ind = pair.first;
+    auto tensor = pair.second;
+    if (!_as_constants[ind] && !tensor->is_dynamic())
+    {
+      auto *buffer = _nonconst_mgr->getBuffer(ind);
+      tensor->setBuffer(buffer);
+
+      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+                                       << "): " << static_cast<void *>(buffer) << std::endl;
+    }
+  }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+                                      bool as_const)
+{
+  assert(!_tensors->getITensor(ind));
+  if (as_const)
+  {
+    auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
+    _tensors->setNativeTensor(ind, tensor);
+  }
+  else
+  {
+    auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
+    _tensors->setNativeTensor(ind, tensor);
+  }
+  _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  assert(_tensors->getITensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getITensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+  assert(_tensors->getITensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getITensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (const auto &it : _tensors->native_tensors())
+    fn(it.first);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
new file mode 100644
index 000000000..2af61e4e7
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+
+#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class StaticTensorManager : public backend::IStaticTensorManager
+{
+public:
+  StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+                      cpu_common::DynamicTensorManager *dynamic_tensor_manager);
+  virtual ~StaticTensorManager() = default;
+
+  void allocateNonconsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout, bool as_const);
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+  std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
+  ir::OperandIndexMap<bool> _as_constants;
+  cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 4dd251bd3..20e60260c 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -29,15 +29,22 @@ namespace cpu
 
 using Tensor = cpu_common::Tensor;
 
-// Tensor which has data from external. To support this, assume below things
-// no padding, always NHWC layout, constant tensor and not dynamic
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ *        instead of allocating and copying the data. ExternalTensor's data pointer points to
+ *        an address of memory such as where memory is already allocated, or mmapped area.
+ *        This is meaning that ExternalTensor can take all of types' ir::Data.
+ *        To support this, assume below things no padding, always NHWC layout,
+ *        constant tensor and not dynamic.
+ */
 class ExternalTensor : public Tensor
 {
 public:
   ExternalTensor() = delete;
 
 public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) : Tensor(info, layout)
+  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+      : Tensor(info, layout, nullptr)
   {
     assert(_layout == ir::Layout::NHWC);
     assert(_info.isConstant());
@@ -45,6 +52,11 @@ public:
   }
 
 public:
+  /**
+   * @brief     set Data to be shared from external so that this ExternalTensor will not be
+   *            allocated on CPU backend
+   * @param[in] data    data of Operand to be set
+   */
   void setData(const std::shared_ptr<ir::Data> data)
   {
     assert(data != nullptr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index 886e8d820..ab8ba5756 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -29,8 +29,8 @@ namespace cpu
 
 TensorBuilder::TensorBuilder()
     : _tensor_reg{new cpu_common::TensorRegistry()},
-      _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
 {
   /* empty */
 }
@@ -77,11 +77,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
   return _tensor_info_map.find(ind) != _tensor_info_map.end();
 }
 
-void TensorBuilder::prepare(void)
-{
-  _static_tensor_mgr->allocateConsts();
-  _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
 
 void TensorBuilder::allocate()
 {
@@ -99,17 +95,17 @@ std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandInde
   return _tensor_reg->getPortableTensor(ind);
 }
 
-bool TensorBuilder::setExternalTensor(const ir::OperandIndex &ind,
-                                      const std::shared_ptr<IPortableTensor> &tensor)
+bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
+                                     const std::shared_ptr<IPortableTensor> &tensor)
 {
-  return _tensor_reg->setExternalTensor(ind, tensor);
+  return _tensor_reg->setMigrantTensor(ind, tensor);
 }
 
 void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
 
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
 {
-  return _tensor_reg->getManagedTensor(ind);
+  return _tensor_reg->getNativeTensor(ind);
 }
 
 std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index ba25451ec..617136514 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -18,13 +18,14 @@
 #define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
 
 #include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/StaticTensorManager.h>
 #include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
 
 #include <backend/ITensorBuilder.h>
 #include <ir/OperandIndexMap.h>
 
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
 #include <unordered_map>
 
 namespace onert
@@ -80,17 +81,17 @@ public:
    *        If not, program will crash with assert or exception.
    * @return shared_ptr<Tensor>
    */
-  std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
+  std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
   std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
-  bool setExternalTensor(const ir::OperandIndex &ind,
-                         const std::shared_ptr<IPortableTensor> &tensor) override;
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override;
 
   std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
 
 private:
   const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
   std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
   ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
new file mode 100644
index 000000000..f2f10eb9d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceNDLayer.h"
+
+#include <cker/operation/BatchToSpaceND.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+BatchToSpaceNDLayer::BatchToSpaceNDLayer()
+    : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
+{
+  const int32_t NNapiCrops[]{0, 0, 0, 0};
+  const int32_t *_crops_buffer;
+
+  if (_crops == nullptr)
+  {
+    _crops_buffer = NNapiCrops;
+  }
+  else
+  {
+    _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+  }
+  nnfw::cker::BatchToSpaceND<T>(
+      getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+      reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
+      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+}
+
+void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                    IPortableTensor *block_shape, IPortableTensor *crops)
+{
+  _output = output;
+  _input = input;
+  _block_shape = block_shape;
+  _crops = crops;
+}
+
+void BatchToSpaceNDLayer::run()
+{
+  if (_output->data_type() == OperandType::FLOAT32)
+  {
+    batchToSpaceNDGeneric<float>();
+  }
+  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    batchToSpaceNDGeneric<uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"NYI"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h
new file mode 100644
index 000000000..6e25b241b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class BatchToSpaceNDLayer : public ::onert::exec::IFunction
+{
+public:
+  BatchToSpaceNDLayer();
+
+public:
+  template <typename T> void batchToSpaceNDGeneric();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 IPortableTensor *block_shape, IPortableTensor *crops);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  IPortableTensor *_block_shape;
+  IPortableTensor *_crops;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CompareLayer.cc b/runtime/onert/backend/cpu/ops/CompareLayer.cc
index f557f3ade..adf902aaf 100644
--- a/runtime/onert/backend/cpu/ops/CompareLayer.cc
+++ b/runtime/onert/backend/cpu/ops/CompareLayer.cc
@@ -17,6 +17,7 @@
 
 #include "OperationUtils.h"
 
+#include <assert.h>
 #include <cker/operation/Comparison.h>
 using namespace nnfw::cker;
 namespace onert
@@ -34,6 +35,14 @@ namespace
 using OpType = onert::ir::operation::Comparison::ComparisonType;
 using namespace onert::backend::cpu;
 
+// Assumes these enum values to be in the order like this
+static_assert(static_cast<int>(OpType::Equal) == 0, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::NotEqual) == 1, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Greater) == 2, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::GreaterEqual) == 3, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Less) == 4, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::LessEqual) == 5, "An OpType value has changed!");
+
 template <typename T>
 void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
                    OpType op_type)
@@ -52,95 +61,33 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
                                       &params.input2_shift);
   params.is_broadcast = !HaveSameShapes(lhs, rhs);
 
-  if (params.is_broadcast)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        Broadcast4DSlowEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        Broadcast4DSlowNotEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        Broadcast4DSlowGreaterWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        Broadcast4DSlowGreaterEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        Broadcast4DSlowLessWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        Broadcast4DSlowLessEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  else // if (requires_broadcast == false)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        EqualWithScaling(params, getExtendedTensorShape(lhs),
-                         reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
-                         reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
-                         reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        NotEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        GreaterWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        GreaterEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        LessWithScaling(params, getExtendedTensorShape(lhs),
-                        reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
-                        reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
-                        reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        LessEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  return;
+  using CompareFunction =
+      void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+               const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
+               bool *output_data);
+
+  static const CompareFunction broadcast_fns[] = {
+      Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
+      Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+      Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
+  };
+  static const CompareFunction non_broadcast_fns[] = {
+      EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
+      GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
+  };
+
+  static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+                "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+  auto index = static_cast<int>(op_type);
+  if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+    throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+  CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+  fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
 }
 
 template <typename T>
@@ -149,94 +96,33 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
 {
   bool requires_broadcast = !HaveSameShapes(lhs, rhs);
 
-  if (requires_broadcast)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        Broadcast4DSlowEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        Broadcast4DSlowNotEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        Broadcast4DSlowGreater(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        Broadcast4DSlowGreaterEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        Broadcast4DSlowLess(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                            getExtendedTensorShape(output),
-                            reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        Broadcast4DSlowLessEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  else // if (requires_broadcast == false)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        EqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                       getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                       getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        NotEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                          getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                          getExtendedTensorShape(output),
-                          reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        GreaterNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                         getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                         getExtendedTensorShape(output),
-                         reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        GreaterEqualNoScaling(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        LessNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                      getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                      getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        LessEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                           getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                           getExtendedTensorShape(output),
-                           reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  return;
+  using CompareFunction =
+      void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+               const T *input2_data, const Shape &output_shape, bool *output_data);
+
+  static const CompareFunction broadcast_fns[] = {
+      Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+      Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
+  };
+  static const CompareFunction non_broadcast_fns[] = {
+      EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
+      GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
+  };
+
+  static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+                "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+  auto index = static_cast<int>(op_type);
+  if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+    throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+  CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+  fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
 }
+
 } // namespace
 
 CompareLayer::CompareLayer()
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index c00be64e5..05da33abf 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -18,6 +18,8 @@
 
 #include "../Tensor.h"
 #include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
@@ -31,7 +33,7 @@ namespace ops
 FullyConnectedLayer::FullyConnectedLayer()
     : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
       _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
-      _is_hybrid(false)
+      _external_context(nullptr), _is_hybrid(false)
 {
   // DO NOTHING
 }
@@ -102,7 +104,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
       getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
 #else
   nnfw::cker::FullyConnectedHybrid(
       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
@@ -110,31 +113,67 @@ void FullyConnectedLayer::fullyConnectedHybrid()
       (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
                         : reinterpret_cast<const int8_t *>(_weights->buffer()),
       getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
 
-// TODO Enable calling decrease_ref
-#if 0
   if (_cached_weights == nullptr || _is_weights_freed)
     return;
 
-  auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
-  if (weight_tensor)
+  // '_cached_weights is not nullptr and _is_weights_freed is false' means
+  // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+  // After entering here, it will not enter again except below the case - input is zero-vector
+
+  // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+  // so that handle this case
+  const int input_size = getTensorShape(_input).FlatSize();
+  if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+    return;
+
+  auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+  // This weight tensor could be other ops' const tensor.
+  // Therefore, below reference should be checked like following
+  auto tensor = const_cast<Tensor *>(weight_tensor);
+  if (tensor->buffer() == nullptr) // ref is already 0?
   {
-    auto tensor = const_cast<Tensor *>(weight_tensor);
+    _is_weights_freed = true;
+    return;
+  }
 
-    tensor->decrease_ref();
-    if (tensor->buffer() == nullptr) // ref == 0?
-    {
-      _is_weights_freed = true;
-    }
+  tensor->decrease_ref();
+  if (tensor->buffer() == nullptr) // ref == 0?
+  {
+    _is_weights_freed = true;
   }
-#endif // if 0
 #endif
 }
 
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::cker::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+
+  int w0_size = getTensorShape(_weights).Dims(0);
+  const uint16_t *w1_segments = _weights->w1_segments();
+  const uint16_t *w1_indices = _weights->w1_indices();
+
+  nnfw::cker::FullyConnectedSparseWeight(
+      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+      w1_indices);
+}
+
 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
                                     const IPortableTensor *bias, ir::Activation activation,
-                                    IPortableTensor *output)
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
 {
   _input = input;
   _weights = weights;
@@ -143,6 +182,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
   _output = output;
   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
                weights->data_type() == OperandType::QUANT_INT8_SYMM;
+  _external_context = external_context;
 }
 
 void FullyConnectedLayer::run()
@@ -151,6 +191,10 @@ void FullyConnectedLayer::run()
   {
     fullyConnectedHybrid();
   }
+  else if (_weights->is_sparse())
+  {
+    fullyConnectedSparseWeight();
+  }
   else if (_input->data_type() == OperandType::FLOAT32)
   {
     fullyConnectedFloat32();
@@ -167,7 +211,16 @@ void FullyConnectedLayer::run()
 
 void FullyConnectedLayer::prepare()
 {
-#ifdef USE_RUY_GEMV
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
   // TODO This is workaround
   // The only fc hybrid will use ruy kernel
   if (_input->data_type() != OperandType::FLOAT32 ||
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
index dd5ef2436..f1242677c 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
@@ -18,6 +18,7 @@
 #define __ONERT_BACKEND_CPU_OPS_FULLYCONNECTEDLAYER_H__
 
 #include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
 #include "OperationUtils.h"
 
 #include <exec/IFunction.h>
@@ -52,8 +53,11 @@ public:
 
   void fullyConnectedHybrid();
 
+  void fullyConnectedSparseWeight();
+
   void configure(const IPortableTensor *input, const IPortableTensor *weights,
-                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output,
+                 const std::shared_ptr<ExternalContext> &external_context);
 
   void run() override;
 
@@ -68,10 +72,13 @@ private:
   ir::Activation _activation;
   std::unique_ptr<nnfw::cker::FCTempArena> _temp_arena;
 
+  std::shared_ptr<ExternalContext> _external_context;
+
   bool _is_hybrid;
 
 #ifdef USE_RUY_GEMV
   uint8_t *_cached_weights = nullptr; // weights to be cached and a key
+  bool _is_weights_freed = false;     // is weights freed?
 #endif
 };
 
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.cc b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
new file mode 100644
index 000000000..0d99b0586
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "L2NormLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/L2Normalize.h>
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+void L2NormLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+}
+
+void L2NormLayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      nnfw::cker::L2NormalizeFloat32(
+          getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      break;
+
+    case OperandType::QUANT_UINT8_ASYMM:
+    {
+      nnfw::cker::L2NormParams params;
+      assert(_input->data_offset() == 128);
+      params.input_zero_point = _input->data_offset();
+      nnfw::cker::L2NormalizeQuant8(
+          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+    }
+    break;
+
+    default:
+      throw std::runtime_error{"L2Norm: Unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.h b/runtime/onert/backend/cpu/ops/L2NormLayer.h
new file mode 100644
index 000000000..63f2d1133
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class L2NormLayer : public ::onert::exec::IFunction
+{
+public:
+  L2NormLayer() : _input(nullptr), _output(nullptr)
+  {
+    // Nothing
+  }
+
+public:
+  void configure(const IPortableTensor *_input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index d71e325ac..06dde4fc4 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -49,8 +49,8 @@ void LogSoftMaxLayer::logsoftmaxQuant8()
   // NYI
 }
 
-void LogSoftMaxLayer::configure(const Tensor *input, const float beta, const int axis,
-                                Tensor *output)
+void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
+                                IPortableTensor *output)
 {
   _input = input;
   _output = output;
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index bc145cea7..ba9deca17 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -40,13 +40,14 @@ public:
 
   void logsoftmaxQuant8();
 
-  void configure(const Tensor *input, const float beta, const int axis, Tensor *output);
+  void configure(const IPortableTensor *input, const float beta, const int axis,
+                 IPortableTensor *output);
 
   void run();
 
 private:
-  const Tensor *_input;
-  Tensor *_output;
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
 
   float _beta;
   int _axis;
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 8d29374ff..98385521a 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -52,6 +52,17 @@ union DataPtr {
   void *v;
 };
 
+union ConstDataPtr {
+  const uint8_t *u8;
+  const int8_t *i8;
+  const uint32_t *u32;
+  const int32_t *i32;
+  const bool *b;
+  const float *f;
+  const int64_t *i64;
+  const void *v;
+};
+
 uint32_t getNumberOfDimensions(const IPortableTensor *tensor);
 
 uint32_t getNumberOfElements(const IPortableTensor *tensor);
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.cc b/runtime/onert/backend/cpu/ops/PadLayer.cc
index fcfcf7b5e..6a2bf9da0 100644
--- a/runtime/onert/backend/cpu/ops/PadLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PadLayer.cc
@@ -33,33 +33,40 @@ PadLayer::PadLayer()
   // DO NOTHING
 }
 
-void PadLayer::padFloat32()
+template <typename T> void PadLayer::padImpl(const T *constant_value_data)
 {
-  nnfw::cker::Pad(_padData, _padRank, getTensorShape(_input),
-                  reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                  reinterpret_cast<float *>(_output->buffer()), _constantValueData.f);
+  nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
+                     reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+                     reinterpret_cast<T *>(_output->buffer()), constant_value_data);
 }
-void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
 
 void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
-                         const int32_t *padData, int32_t padRank, uint8_t *constantValueData)
+                         const int32_t *padData, int32_t padRank, const void *constantValueData)
 {
   _input = input;
   _output = output;
   memcpy(_padData, padData, sizeof(_padData));
   _padRank = padRank;
-  _constantValueData.u8 = constantValueData;
+  _constantValueData.v = constantValueData;
 }
 
 void PadLayer::run()
 {
   if (_input->data_type() == OperandType::FLOAT32)
   {
-    padFloat32();
+    padImpl<float>(_constantValueData.f);
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    padQuant8();
+    if (_constantValueData.u8 == nullptr)
+    {
+      uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
+      padImpl<uint8_t>(&pad_value);
+    }
+    else
+    {
+      padImpl<uint8_t>(_constantValueData.u8);
+    }
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.h b/runtime/onert/backend/cpu/ops/PadLayer.h
index 85bd2e6f0..efd73d5e5 100644
--- a/runtime/onert/backend/cpu/ops/PadLayer.h
+++ b/runtime/onert/backend/cpu/ops/PadLayer.h
@@ -39,12 +39,10 @@ public:
   PadLayer();
 
 public:
-  void padFloat32();
-
-  void padQuant8();
+  template <typename T> void padImpl(const T *constant_value_data);
 
   void configure(const IPortableTensor *input, IPortableTensor *output, const int32_t *padData,
-                 int32_t padRank, uint8_t *constantValueData = nullptr);
+                 int32_t padRank, const void *constantValueData = nullptr);
 
   void run() override;
 
@@ -54,7 +52,7 @@ private:
 
   int32_t _padData[8];
   int32_t _padRank;
-  DataPtr _constantValueData;
+  ConstDataPtr _constantValueData;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
new file mode 100644
index 000000000..45fc148bf
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Quantize.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
+{
+  nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
+                       getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
+                       _output->data_scale(), _output->data_offset());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void QuantizeLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    affineQuantize<float, uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
new file mode 100644
index 000000000..b4e7aca40
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+  QuantizeLayer();
+
+public:
+  template <typename InputT, typename OutputT> void affineQuantize();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
new file mode 100644
index 000000000..26eb35e0d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6Layer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/ReLU6.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+void ReLU6Layer::relu6Float32()
+{
+  nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+                    reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ReLU6Layer::relu6Quant8()
+{
+  // cker quant8 relu is not implemented yet
+  throw std::runtime_error{"NYI"};
+}
+
+void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void ReLU6Layer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    relu6Float32();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    relu6Quant8();
+  }
+  else
+  {
+    throw std::runtime_error{"ReLU6: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ReLU6Layer.h
new file mode 100644
index 000000000..994d17a30
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ReLU6Layer : public ::onert::exec::IFunction
+{
+public:
+  ReLU6Layer();
+
+public:
+  void relu6Float32();
+
+  void relu6Quant8();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index 1dad031aa..fe22dbed7 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -116,6 +116,39 @@ void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
       throw std::runtime_error{"Reduce(generic): unsupported data type"};
   }
 }
+
+void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
+                      const std::vector<int> &axes, bool keep_dims,
+                      nnfw::cker::Reduce &reduce_kernel)
+{
+  const bool same_scale = (input->data_scale() == output->data_scale() &&
+                           input->data_offset() == output->data_offset());
+
+  reduce_kernel.prepare(input->num_dimensions(), axes.size());
+
+  if (!same_scale)
+  {
+    std::vector<int32_t> temp_sum(output->getShape().num_elements());
+    bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
+        reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
+        input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
+        output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
+        temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
+          const int32_t actual_in = static_cast<int32_t>(in);
+          return current + actual_in;
+        });
+
+    if (!result)
+    {
+      throw std::runtime_error{"Reduce: Fail to run"};
+    }
+
+    return;
+  }
+
+  evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+}
+
 } // namespace
 
 ReduceLayer::ReduceLayer()
@@ -143,6 +176,11 @@ void ReduceLayer::run()
   switch (_reduceType)
   {
     case ReduceType::kSum:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+        return;
+      }
       evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
       break;
     case ReduceType::kProd:
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
new file mode 100644
index 000000000..180094bb8
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OperationUtils.h"
+#include "ResizeBilinearLayer.h"
+#include "cker/operation/ResizeBilinear.h"
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ResizeBilinearLayer::ResizeBilinearLayer()
+    : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
+      _half_pixel_centers(false)
+{
+  // DO NOTHING
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                    int32_t output_height, int32_t output_width, bool align_corners,
+                                    bool half_pixel_centers)
+{
+  _input = input;
+  _output = output;
+  _output_height = output_height;
+  _output_width = output_width;
+  _align_corners = align_corners;
+  _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::run()
+{
+  nnfw::cker::ResizeBilinearParams params;
+  params.align_corners = _align_corners;
+  params.half_pixel_centers = _half_pixel_centers;
+  params.output_height = _output_height;
+  params.output_width = _output_width;
+
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      nnfw::cker::ResizeBilinear(
+          params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      break;
+
+    case OperandType::QUANT_UINT8_ASYMM:
+      nnfw::cker::ResizeBilinear(
+          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      break;
+
+    case OperandType::UINT8:
+    case OperandType::BOOL8:
+    case OperandType::FLOAT16:
+    case OperandType::INT32:
+    case OperandType::INT64:
+    case OperandType::QUANT_INT8_SYMM:
+      std::runtime_error("ResizeBilinear NYI");
+      break;
+    default:
+      std::runtime_error("ResizeBilinear unsupported data type");
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
new file mode 100644
index 000000000..fc49b348e
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+#define __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ResizeBilinearLayer : public ::onert::exec::IFunction
+{
+public:
+  ResizeBilinearLayer();
+
+public:
+  void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+                 int32_t output_width, bool align_corners, bool half_pixel_centers);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  int32_t _output_height;
+  int32_t _output_width;
+  bool _align_corners;
+  bool _half_pixel_centers;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.cc b/runtime/onert/backend/cpu/ops/SliceLayer.cc
index a9106c1a2..449c073e6 100644
--- a/runtime/onert/backend/cpu/ops/SliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.cc
@@ -46,7 +46,7 @@ void SliceLayer::GetBeginAndSizeVectors(int dimensions, const IPortableTensor *b
   }
 }
 
-void SliceLayer::sliceFloat32()
+template <typename T> void SliceLayer::sliceImpl()
 {
   const int kMaxDim = nnfw::cker::Shape::kMaxSmallSize;
 
@@ -74,14 +74,8 @@ void SliceLayer::sliceFloat32()
   }
 
   nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
-                    reinterpret_cast<const float *>(_input->buffer()),
-                    reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SliceLayer::sliceQuant8()
-{
-  // cker quant8 slice is not implemented yet
-  throw std::runtime_error{"NYI"};
+                    reinterpret_cast<const T *>(_input->buffer()),
+                    reinterpret_cast<T *>(_output->buffer()));
 }
 
 void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
@@ -97,11 +91,11 @@ void SliceLayer::run()
 {
   if (_input->data_type() == OperandType::FLOAT32)
   {
-    sliceFloat32();
+    sliceImpl<float>();
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    sliceQuant8();
+    sliceImpl<uint8_t>();
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.h b/runtime/onert/backend/cpu/ops/SliceLayer.h
index 9945d7ee6..650e2c97a 100644
--- a/runtime/onert/backend/cpu/ops/SliceLayer.h
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.h
@@ -42,8 +42,7 @@ public:
   void run() override;
 
 private:
-  void sliceFloat32();
-  void sliceQuant8();
+  template <typename T> void sliceImpl();
 
   template <typename T>
   void GetBeginAndSizeVectors(int dimensions, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
new file mode 100644
index 000000000..a0869aed8
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepthLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SpaceToDepth.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+SpaceToDepthLayer::SpaceToDepthLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void SpaceToDepthLayer::spaceToDepth()
+{
+
+  nnfw::cker::SpaceToDepthParams params;
+  params.block_size = _block_size;
+
+  nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
+                           reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+                           reinterpret_cast<T *>(_output->buffer()));
+}
+
+void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
+                                  IPortableTensor *output)
+{
+  _input = input;
+  _block_size = block_size;
+  _output = output;
+}
+
+void SpaceToDepthLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    spaceToDepth<float>();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    spaceToDepth<uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"SpaceToDepth: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h
new file mode 100644
index 000000000..c11ef2b0a
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class SpaceToDepthLayer : public ::onert::exec::IFunction
+{
+public:
+  SpaceToDepthLayer();
+
+  void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  template <typename T> void spaceToDepth();
+
+  const IPortableTensor *_input;
+  int32_t _block_size;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPACE_TO_BATCH_ND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.cc b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
new file mode 100644
index 000000000..d6ca12442
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitVLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SplitV.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+SplitVLayer::SplitVLayer()
+    : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+{
+  // DO NOTHING
+}
+
+template <typename T> void SplitVLayer::splitV(void)
+{
+  nnfw::cker::SplitVParams op_params;
+  op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+  op_params.num_split = _num_splits;
+
+  std::vector<T *> outputPtrs;
+  std::vector<nnfw::cker::Shape> outshape;
+
+  for (const auto output : _outputs)
+  {
+    assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
+    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outshape.emplace_back(getTensorShape(output));
+  }
+
+  assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
+  nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
+                        outshape, outputPtrs.data());
+}
+
+void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+                            const IPortableTensor *split_dim, uint16_t num_splits,
+                            std::vector<IPortableTensor *> &outputs)
+{
+  assert(input != nullptr);
+
+  _num_splits = num_splits;
+  _size_splits = size_splits;
+  _input = input;
+  _split_dim = split_dim;
+  _outputs = outputs;
+}
+
+void SplitVLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    splitV<float>();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    splitV<uint8_t>();
+  }
+  else if (_input->data_type() == OperandType::INT32)
+  {
+    splitV<int32_t>();
+  }
+  else if (_input->data_type() == OperandType::INT64)
+  {
+    splitV<int64_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"SplitV: unsupported input type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.h b/runtime/onert/backend/cpu/ops/SplitVLayer.h
new file mode 100644
index 000000000..98f2f4406
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class SplitVLayer : public ::onert::exec::IFunction
+{
+public:
+  SplitVLayer();
+
+public:
+  template <typename T> void splitV(void);
+
+  void configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+                 const IPortableTensor *size_dim, uint16_t num_splits,
+                 std::vector<IPortableTensor *> &outputs);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_size_splits;
+  const IPortableTensor *_split_dim;
+  uint16_t _num_splits;
+  std::vector<IPortableTensor *> _outputs;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
new file mode 100644
index 000000000..b8dfcb4b5
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StatelessRandomUniformLayer.h"
+
+#include <cker/operation/StatelessRandomUniform.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+StatelessRandomUniformLayer::StatelessRandomUniformLayer()
+    : _shape(nullptr), _seed(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
+                                            const IPortableTensor *seed, IPortableTensor *output)
+{
+  _shape = shape;
+  _seed = seed;
+  _output = output;
+}
+
+void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
+{
+  nnfw::cker::StatelessRandomUniform(
+      getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
+      getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void StatelessRandomUniformLayer::run()
+{
+  switch (_output->data_type())
+  {
+    // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
+    case OperandType::FLOAT32:
+      StatelessRandomUniformFloat32();
+      break;
+    default:
+      throw std::runtime_error{"StatelessRandomUniformLayer: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h
new file mode 100644
index 000000000..ef11d623d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class StatelessRandomUniformLayer : public ::onert::exec::IFunction
+{
+public:
+  StatelessRandomUniformLayer();
+
+public:
+  void configure(const IPortableTensor *shape, const IPortableTensor *seed,
+                 IPortableTensor *output);
+
+  void StatelessRandomUniformFloat32();
+
+  void run() override;
+
+private:
+  const IPortableTensor *_shape;
+  const IPortableTensor *_seed;
+
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__